2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
31 #include <linux/audit.h>
32 #include <linux/bpf.h>
33 #include <linux/filter.h>
34 #include <linux/seccomp.h>
35 #include <linux/unistd.h>
40 #include <sys/prctl.h>
44 #include <llvm/Config/llvm-config.h>
46 #include "radv_debug.h"
47 #include "radv_private.h"
48 #include "radv_shader.h"
50 #include "util/disk_cache.h"
54 #include <amdgpu_drm.h>
55 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
56 #include "ac_llvm_util.h"
57 #include "vk_format.h"
60 #include "util/build_id.h"
61 #include "util/debug.h"
62 #include "util/mesa-sha1.h"
63 #include "util/timespec.h"
64 #include "util/u_atomic.h"
65 #include "compiler/glsl_types.h"
66 #include "util/xmlpool.h"
68 static struct radv_timeline_point
*
69 radv_timeline_find_point_at_least_locked(struct radv_device
*device
,
70 struct radv_timeline
*timeline
,
73 static struct radv_timeline_point
*
74 radv_timeline_add_point_locked(struct radv_device
*device
,
75 struct radv_timeline
*timeline
,
79 radv_timeline_trigger_waiters_locked(struct radv_timeline
*timeline
,
80 struct list_head
*processing_list
);
83 void radv_destroy_semaphore_part(struct radv_device
*device
,
84 struct radv_semaphore_part
*part
);
87 radv_device_get_cache_uuid(enum radeon_family family
, void *uuid
)
90 unsigned char sha1
[20];
91 unsigned ptr_size
= sizeof(void*);
93 memset(uuid
, 0, VK_UUID_SIZE
);
94 _mesa_sha1_init(&ctx
);
96 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid
, &ctx
) ||
97 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo
, &ctx
))
100 _mesa_sha1_update(&ctx
, &family
, sizeof(family
));
101 _mesa_sha1_update(&ctx
, &ptr_size
, sizeof(ptr_size
));
102 _mesa_sha1_final(&ctx
, sha1
);
104 memcpy(uuid
, sha1
, VK_UUID_SIZE
);
109 radv_get_driver_uuid(void *uuid
)
111 ac_compute_driver_uuid(uuid
, VK_UUID_SIZE
);
115 radv_get_device_uuid(struct radeon_info
*info
, void *uuid
)
117 ac_compute_device_uuid(info
, uuid
, VK_UUID_SIZE
);
121 radv_get_visible_vram_size(struct radv_physical_device
*device
)
123 return MIN2(device
->rad_info
.vram_size
, device
->rad_info
.vram_vis_size
);
127 radv_get_vram_size(struct radv_physical_device
*device
)
129 return device
->rad_info
.vram_size
- radv_get_visible_vram_size(device
);
133 radv_is_mem_type_vram(enum radv_mem_type type
)
135 return type
== RADV_MEM_TYPE_VRAM
||
136 type
== RADV_MEM_TYPE_VRAM_UNCACHED
;
140 radv_is_mem_type_vram_visible(enum radv_mem_type type
)
142 return type
== RADV_MEM_TYPE_VRAM_CPU_ACCESS
||
143 type
== RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED
;
146 radv_is_mem_type_gtt_wc(enum radv_mem_type type
)
148 return type
== RADV_MEM_TYPE_GTT_WRITE_COMBINE
||
149 type
== RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED
;
153 radv_is_mem_type_gtt_cached(enum radv_mem_type type
)
155 return type
== RADV_MEM_TYPE_GTT_CACHED
||
156 type
== RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED
;
160 radv_is_mem_type_uncached(enum radv_mem_type type
)
162 return type
== RADV_MEM_TYPE_VRAM_UNCACHED
||
163 type
== RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED
||
164 type
== RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED
||
165 type
== RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED
;
169 radv_physical_device_init_mem_types(struct radv_physical_device
*device
)
171 STATIC_ASSERT(RADV_MEM_HEAP_COUNT
<= VK_MAX_MEMORY_HEAPS
);
172 uint64_t visible_vram_size
= radv_get_visible_vram_size(device
);
173 uint64_t vram_size
= radv_get_vram_size(device
);
174 int vram_index
= -1, visible_vram_index
= -1, gart_index
= -1;
175 device
->memory_properties
.memoryHeapCount
= 0;
177 vram_index
= device
->memory_properties
.memoryHeapCount
++;
178 device
->memory_properties
.memoryHeaps
[vram_index
] = (VkMemoryHeap
) {
180 .flags
= VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
,
183 if (visible_vram_size
) {
184 visible_vram_index
= device
->memory_properties
.memoryHeapCount
++;
185 device
->memory_properties
.memoryHeaps
[visible_vram_index
] = (VkMemoryHeap
) {
186 .size
= visible_vram_size
,
187 .flags
= VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
,
190 if (device
->rad_info
.gart_size
> 0) {
191 gart_index
= device
->memory_properties
.memoryHeapCount
++;
192 device
->memory_properties
.memoryHeaps
[gart_index
] = (VkMemoryHeap
) {
193 .size
= device
->rad_info
.gart_size
,
194 .flags
= device
->rad_info
.has_dedicated_vram
? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
,
198 STATIC_ASSERT(RADV_MEM_TYPE_COUNT
<= VK_MAX_MEMORY_TYPES
);
199 unsigned type_count
= 0;
200 if (vram_index
>= 0) {
201 device
->mem_type_indices
[type_count
] = RADV_MEM_TYPE_VRAM
;
202 device
->memory_properties
.memoryTypes
[type_count
++] = (VkMemoryType
) {
203 .propertyFlags
= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
,
204 .heapIndex
= vram_index
,
207 if (gart_index
>= 0 && device
->rad_info
.has_dedicated_vram
) {
208 device
->mem_type_indices
[type_count
] = RADV_MEM_TYPE_GTT_WRITE_COMBINE
;
209 device
->memory_properties
.memoryTypes
[type_count
++] = (VkMemoryType
) {
210 .propertyFlags
= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
211 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
,
212 .heapIndex
= gart_index
,
215 if (visible_vram_index
>= 0) {
216 device
->mem_type_indices
[type_count
] = RADV_MEM_TYPE_VRAM_CPU_ACCESS
;
217 device
->memory_properties
.memoryTypes
[type_count
++] = (VkMemoryType
) {
218 .propertyFlags
= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
|
219 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
220 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
,
221 .heapIndex
= visible_vram_index
,
224 if (gart_index
>= 0 && !device
->rad_info
.has_dedicated_vram
) {
225 /* Put GTT after visible VRAM for GPUs without dedicated VRAM
226 * as they have identical property flags, and according to the
227 * spec, for types with identical flags, the one with greater
228 * performance must be given a lower index. */
229 device
->mem_type_indices
[type_count
] = RADV_MEM_TYPE_GTT_WRITE_COMBINE
;
230 device
->memory_properties
.memoryTypes
[type_count
++] = (VkMemoryType
) {
231 .propertyFlags
= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
|
232 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
233 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
,
234 .heapIndex
= gart_index
,
237 if (gart_index
>= 0) {
238 device
->mem_type_indices
[type_count
] = RADV_MEM_TYPE_GTT_CACHED
;
239 device
->memory_properties
.memoryTypes
[type_count
++] = (VkMemoryType
) {
240 .propertyFlags
= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
241 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
|
242 VK_MEMORY_PROPERTY_HOST_CACHED_BIT
|
243 (device
->rad_info
.has_dedicated_vram
? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
),
244 .heapIndex
= gart_index
,
247 device
->memory_properties
.memoryTypeCount
= type_count
;
249 if (device
->rad_info
.has_l2_uncached
) {
250 for (int i
= 0; i
< device
->memory_properties
.memoryTypeCount
; i
++) {
251 VkMemoryType mem_type
= device
->memory_properties
.memoryTypes
[i
];
253 if ((mem_type
.propertyFlags
& (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
|
254 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
)) ||
255 mem_type
.propertyFlags
== VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
) {
256 enum radv_mem_type mem_type_id
;
258 switch (device
->mem_type_indices
[i
]) {
259 case RADV_MEM_TYPE_VRAM
:
260 mem_type_id
= RADV_MEM_TYPE_VRAM_UNCACHED
;
262 case RADV_MEM_TYPE_VRAM_CPU_ACCESS
:
263 mem_type_id
= RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED
;
265 case RADV_MEM_TYPE_GTT_WRITE_COMBINE
:
266 mem_type_id
= RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED
;
268 case RADV_MEM_TYPE_GTT_CACHED
:
269 mem_type_id
= RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED
;
272 unreachable("invalid memory type");
275 VkMemoryPropertyFlags property_flags
= mem_type
.propertyFlags
|
276 VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD
|
277 VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD
;
279 device
->mem_type_indices
[type_count
] = mem_type_id
;
280 device
->memory_properties
.memoryTypes
[type_count
++] = (VkMemoryType
) {
281 .propertyFlags
= property_flags
,
282 .heapIndex
= mem_type
.heapIndex
,
286 device
->memory_properties
.memoryTypeCount
= type_count
;
291 radv_handle_env_var_force_family(struct radv_physical_device
*device
)
293 const char *family
= getenv("RADV_FORCE_FAMILY");
299 for (i
= CHIP_TAHITI
; i
< CHIP_LAST
; i
++) {
300 if (!strcmp(family
, ac_get_llvm_processor_name(i
))) {
301 /* Override family and chip_class. */
302 device
->rad_info
.family
= i
;
304 if (i
>= CHIP_NAVI10
)
305 device
->rad_info
.chip_class
= GFX10
;
306 else if (i
>= CHIP_VEGA10
)
307 device
->rad_info
.chip_class
= GFX9
;
308 else if (i
>= CHIP_TONGA
)
309 device
->rad_info
.chip_class
= GFX8
;
310 else if (i
>= CHIP_BONAIRE
)
311 device
->rad_info
.chip_class
= GFX7
;
313 device
->rad_info
.chip_class
= GFX6
;
319 fprintf(stderr
, "radv: Unknown family: %s\n", family
);
324 radv_physical_device_init(struct radv_physical_device
*device
,
325 struct radv_instance
*instance
,
326 drmDevicePtr drm_device
)
328 const char *path
= drm_device
->nodes
[DRM_NODE_RENDER
];
330 drmVersionPtr version
;
334 fd
= open(path
, O_RDWR
| O_CLOEXEC
);
336 if (instance
->debug_flags
& RADV_DEBUG_STARTUP
)
337 radv_logi("Could not open device '%s'", path
);
339 return vk_error(instance
, VK_ERROR_INCOMPATIBLE_DRIVER
);
342 version
= drmGetVersion(fd
);
346 if (instance
->debug_flags
& RADV_DEBUG_STARTUP
)
347 radv_logi("Could not get the kernel driver version for device '%s'", path
);
349 return vk_errorf(instance
, VK_ERROR_INCOMPATIBLE_DRIVER
,
350 "failed to get version %s: %m", path
);
353 if (strcmp(version
->name
, "amdgpu")) {
354 drmFreeVersion(version
);
357 if (instance
->debug_flags
& RADV_DEBUG_STARTUP
)
358 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path
);
360 return VK_ERROR_INCOMPATIBLE_DRIVER
;
362 drmFreeVersion(version
);
364 if (instance
->debug_flags
& RADV_DEBUG_STARTUP
)
365 radv_logi("Found compatible device '%s'.", path
);
367 device
->_loader_data
.loaderMagic
= ICD_LOADER_MAGIC
;
368 device
->instance
= instance
;
370 device
->ws
= radv_amdgpu_winsys_create(fd
, instance
->debug_flags
,
371 instance
->perftest_flags
);
373 result
= vk_error(instance
, VK_ERROR_INCOMPATIBLE_DRIVER
);
377 if (instance
->enabled_extensions
.KHR_display
) {
378 master_fd
= open(drm_device
->nodes
[DRM_NODE_PRIMARY
], O_RDWR
| O_CLOEXEC
);
379 if (master_fd
>= 0) {
380 uint32_t accel_working
= 0;
381 struct drm_amdgpu_info request
= {
382 .return_pointer
= (uintptr_t)&accel_working
,
383 .return_size
= sizeof(accel_working
),
384 .query
= AMDGPU_INFO_ACCEL_WORKING
387 if (drmCommandWrite(master_fd
, DRM_AMDGPU_INFO
, &request
, sizeof (struct drm_amdgpu_info
)) < 0 || !accel_working
) {
394 device
->master_fd
= master_fd
;
395 device
->local_fd
= fd
;
396 device
->ws
->query_info(device
->ws
, &device
->rad_info
);
398 radv_handle_env_var_force_family(device
);
400 device
->use_aco
= instance
->perftest_flags
& RADV_PERFTEST_ACO
;
401 if (device
->rad_info
.chip_class
< GFX7
&& device
->use_aco
) {
402 fprintf(stderr
, "WARNING: disabling ACO on unsupported GPUs.\n");
403 device
->use_aco
= false;
406 snprintf(device
->name
, sizeof(device
->name
),
407 "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING
")", device
->use_aco
? "/ACO" : "",
408 device
->rad_info
.name
);
410 if (radv_device_get_cache_uuid(device
->rad_info
.family
, device
->cache_uuid
)) {
411 device
->ws
->destroy(device
->ws
);
412 result
= vk_errorf(instance
, VK_ERROR_INITIALIZATION_FAILED
,
413 "cannot generate UUID");
417 /* These flags affect shader compilation. */
418 uint64_t shader_env_flags
=
419 (device
->instance
->perftest_flags
& RADV_PERFTEST_SISCHED
? 0x1 : 0) |
420 (device
->use_aco
? 0x2 : 0);
422 /* The gpu id is already embedded in the uuid so we just pass "radv"
423 * when creating the cache.
425 char buf
[VK_UUID_SIZE
* 2 + 1];
426 disk_cache_format_hex_id(buf
, device
->cache_uuid
, VK_UUID_SIZE
* 2);
427 device
->disk_cache
= disk_cache_create(device
->name
, buf
, shader_env_flags
);
429 if (device
->rad_info
.chip_class
< GFX8
||
430 device
->rad_info
.chip_class
> GFX9
)
431 fprintf(stderr
, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
433 radv_get_driver_uuid(&device
->driver_uuid
);
434 radv_get_device_uuid(&device
->rad_info
, &device
->device_uuid
);
436 device
->out_of_order_rast_allowed
= device
->rad_info
.has_out_of_order_rast
&&
437 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_OUT_OF_ORDER
);
439 device
->dcc_msaa_allowed
=
440 (device
->instance
->perftest_flags
& RADV_PERFTEST_DCC_MSAA
);
442 device
->use_shader_ballot
= (device
->use_aco
&& device
->rad_info
.chip_class
>= GFX8
) ||
443 (device
->instance
->perftest_flags
& RADV_PERFTEST_SHADER_BALLOT
);
445 device
->use_ngg
= device
->rad_info
.chip_class
>= GFX10
&&
446 device
->rad_info
.family
!= CHIP_NAVI14
&&
447 !(device
->instance
->debug_flags
& RADV_DEBUG_NO_NGG
);
448 if (device
->use_aco
&& device
->use_ngg
) {
449 fprintf(stderr
, "WARNING: disabling NGG because ACO is used.\n");
450 device
->use_ngg
= false;
453 device
->use_ngg_streamout
= false;
455 /* Determine the number of threads per wave for all stages. */
456 device
->cs_wave_size
= 64;
457 device
->ps_wave_size
= 64;
458 device
->ge_wave_size
= 64;
460 if (device
->rad_info
.chip_class
>= GFX10
) {
461 if (device
->instance
->perftest_flags
& RADV_PERFTEST_CS_WAVE_32
)
462 device
->cs_wave_size
= 32;
464 /* For pixel shaders, wave64 is recommanded. */
465 if (device
->instance
->perftest_flags
& RADV_PERFTEST_PS_WAVE_32
)
466 device
->ps_wave_size
= 32;
468 if (device
->instance
->perftest_flags
& RADV_PERFTEST_GE_WAVE_32
)
469 device
->ge_wave_size
= 32;
472 radv_physical_device_init_mem_types(device
);
473 radv_fill_device_extension_table(device
, &device
->supported_extensions
);
475 device
->bus_info
= *drm_device
->businfo
.pci
;
477 if ((device
->instance
->debug_flags
& RADV_DEBUG_INFO
))
478 ac_print_gpu_info(&device
->rad_info
);
480 /* The WSI is structured as a layer on top of the driver, so this has
481 * to be the last part of initialization (at least until we get other
484 result
= radv_init_wsi(device
);
485 if (result
!= VK_SUCCESS
) {
486 device
->ws
->destroy(device
->ws
);
487 vk_error(instance
, result
);
501 radv_physical_device_finish(struct radv_physical_device
*device
)
503 radv_finish_wsi(device
);
504 device
->ws
->destroy(device
->ws
);
505 disk_cache_destroy(device
->disk_cache
);
506 close(device
->local_fd
);
507 if (device
->master_fd
!= -1)
508 close(device
->master_fd
);
512 default_alloc_func(void *pUserData
, size_t size
, size_t align
,
513 VkSystemAllocationScope allocationScope
)
519 default_realloc_func(void *pUserData
, void *pOriginal
, size_t size
,
520 size_t align
, VkSystemAllocationScope allocationScope
)
522 return realloc(pOriginal
, size
);
526 default_free_func(void *pUserData
, void *pMemory
)
531 static const VkAllocationCallbacks default_alloc
= {
533 .pfnAllocation
= default_alloc_func
,
534 .pfnReallocation
= default_realloc_func
,
535 .pfnFree
= default_free_func
,
538 static const struct debug_control radv_debug_options
[] = {
539 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS
},
540 {"nodcc", RADV_DEBUG_NO_DCC
},
541 {"shaders", RADV_DEBUG_DUMP_SHADERS
},
542 {"nocache", RADV_DEBUG_NO_CACHE
},
543 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS
},
544 {"nohiz", RADV_DEBUG_NO_HIZ
},
545 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE
},
546 {"allbos", RADV_DEBUG_ALL_BOS
},
547 {"noibs", RADV_DEBUG_NO_IBS
},
548 {"spirv", RADV_DEBUG_DUMP_SPIRV
},
549 {"vmfaults", RADV_DEBUG_VM_FAULTS
},
550 {"zerovram", RADV_DEBUG_ZERO_VRAM
},
551 {"syncshaders", RADV_DEBUG_SYNC_SHADERS
},
552 {"nosisched", RADV_DEBUG_NO_SISCHED
},
553 {"preoptir", RADV_DEBUG_PREOPTIR
},
554 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS
},
555 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER
},
556 {"info", RADV_DEBUG_INFO
},
557 {"errors", RADV_DEBUG_ERRORS
},
558 {"startup", RADV_DEBUG_STARTUP
},
559 {"checkir", RADV_DEBUG_CHECKIR
},
560 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM
},
561 {"nobinning", RADV_DEBUG_NOBINNING
},
562 {"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT
},
563 {"nongg", RADV_DEBUG_NO_NGG
},
564 {"noshaderballot", RADV_DEBUG_NO_SHADER_BALLOT
},
565 {"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS
},
566 {"metashaders", RADV_DEBUG_DUMP_META_SHADERS
},
567 {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE
},
572 radv_get_debug_option_name(int id
)
574 assert(id
< ARRAY_SIZE(radv_debug_options
) - 1);
575 return radv_debug_options
[id
].string
;
578 static const struct debug_control radv_perftest_options
[] = {
579 {"nobatchchain", RADV_PERFTEST_NO_BATCHCHAIN
},
580 {"sisched", RADV_PERFTEST_SISCHED
},
581 {"localbos", RADV_PERFTEST_LOCAL_BOS
},
582 {"dccmsaa", RADV_PERFTEST_DCC_MSAA
},
583 {"bolist", RADV_PERFTEST_BO_LIST
},
584 {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT
},
585 {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK
},
586 {"cswave32", RADV_PERFTEST_CS_WAVE_32
},
587 {"pswave32", RADV_PERFTEST_PS_WAVE_32
},
588 {"gewave32", RADV_PERFTEST_GE_WAVE_32
},
589 {"dfsm", RADV_PERFTEST_DFSM
},
590 {"aco", RADV_PERFTEST_ACO
},
595 radv_get_perftest_option_name(int id
)
597 assert(id
< ARRAY_SIZE(radv_perftest_options
) - 1);
598 return radv_perftest_options
[id
].string
;
602 radv_handle_per_app_options(struct radv_instance
*instance
,
603 const VkApplicationInfo
*info
)
605 const char *name
= info
? info
->pApplicationName
: NULL
;
610 if (!strcmp(name
, "Talos - Linux - 32bit") ||
611 !strcmp(name
, "Talos - Linux - 64bit")) {
612 if (!(instance
->debug_flags
& RADV_DEBUG_NO_SISCHED
)) {
613 /* Force enable LLVM sisched for Talos because it looks
614 * safe and it gives few more FPS.
616 instance
->perftest_flags
|= RADV_PERFTEST_SISCHED
;
618 } else if (!strcmp(name
, "DOOM_VFR")) {
619 /* Work around a Doom VFR game bug */
620 instance
->debug_flags
|= RADV_DEBUG_NO_DYNAMIC_BOUNDS
;
621 } else if (!strcmp(name
, "MonsterHunterWorld.exe")) {
622 /* Workaround for a WaW hazard when LLVM moves/merges
623 * load/store memory operations.
624 * See https://reviews.llvm.org/D61313
626 if (LLVM_VERSION_MAJOR
< 9)
627 instance
->debug_flags
|= RADV_DEBUG_NO_LOAD_STORE_OPT
;
628 } else if (!strcmp(name
, "Wolfenstein: Youngblood")) {
629 if (!(instance
->debug_flags
& RADV_DEBUG_NO_SHADER_BALLOT
) &&
630 !(instance
->perftest_flags
& RADV_PERFTEST_ACO
)) {
631 /* Force enable VK_AMD_shader_ballot because it looks
632 * safe and it gives a nice boost (+20% on Vega 56 at
633 * this time). It also prevents corruption on LLVM.
635 instance
->perftest_flags
|= RADV_PERFTEST_SHADER_BALLOT
;
637 } else if (!strcmp(name
, "Fledge")) {
639 * Zero VRAM for "The Surge 2"
641 * This avoid a hang when when rendering any level. Likely
642 * uninitialized data in an indirect draw.
644 instance
->debug_flags
|= RADV_DEBUG_ZERO_VRAM
;
648 static int radv_get_instance_extension_index(const char *name
)
650 for (unsigned i
= 0; i
< RADV_INSTANCE_EXTENSION_COUNT
; ++i
) {
651 if (strcmp(name
, radv_instance_extensions
[i
].extensionName
) == 0)
657 static const char radv_dri_options_xml
[] =
659 DRI_CONF_SECTION_PERFORMANCE
660 DRI_CONF_ADAPTIVE_SYNC("true")
661 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
662 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT("false")
666 static void radv_init_dri_options(struct radv_instance
*instance
)
668 driParseOptionInfo(&instance
->available_dri_options
, radv_dri_options_xml
);
669 driParseConfigFiles(&instance
->dri_options
,
670 &instance
->available_dri_options
,
672 instance
->engineName
,
673 instance
->engineVersion
);
676 VkResult
radv_CreateInstance(
677 const VkInstanceCreateInfo
* pCreateInfo
,
678 const VkAllocationCallbacks
* pAllocator
,
679 VkInstance
* pInstance
)
681 struct radv_instance
*instance
;
684 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO
);
686 uint32_t client_version
;
687 if (pCreateInfo
->pApplicationInfo
&&
688 pCreateInfo
->pApplicationInfo
->apiVersion
!= 0) {
689 client_version
= pCreateInfo
->pApplicationInfo
->apiVersion
;
691 client_version
= VK_API_VERSION_1_0
;
694 const char *engine_name
= NULL
;
695 uint32_t engine_version
= 0;
696 if (pCreateInfo
->pApplicationInfo
) {
697 engine_name
= pCreateInfo
->pApplicationInfo
->pEngineName
;
698 engine_version
= pCreateInfo
->pApplicationInfo
->engineVersion
;
701 instance
= vk_zalloc2(&default_alloc
, pAllocator
, sizeof(*instance
), 8,
702 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE
);
704 return vk_error(NULL
, VK_ERROR_OUT_OF_HOST_MEMORY
);
706 instance
->_loader_data
.loaderMagic
= ICD_LOADER_MAGIC
;
709 instance
->alloc
= *pAllocator
;
711 instance
->alloc
= default_alloc
;
713 instance
->apiVersion
= client_version
;
714 instance
->physicalDeviceCount
= -1;
716 /* Get secure compile thread count. NOTE: We cap this at 32 */
717 #define MAX_SC_PROCS 32
718 char *num_sc_threads
= getenv("RADV_SECURE_COMPILE_THREADS");
720 instance
->num_sc_threads
= MIN2(strtoul(num_sc_threads
, NULL
, 10), MAX_SC_PROCS
);
722 instance
->debug_flags
= parse_debug_string(getenv("RADV_DEBUG"),
725 /* Disable memory cache when secure compile is set */
726 if (radv_device_use_secure_compile(instance
))
727 instance
->debug_flags
|= RADV_DEBUG_NO_MEMORY_CACHE
;
729 instance
->perftest_flags
= parse_debug_string(getenv("RADV_PERFTEST"),
730 radv_perftest_options
);
732 if (instance
->perftest_flags
& RADV_PERFTEST_ACO
)
733 fprintf(stderr
, "WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely\n");
735 if (instance
->debug_flags
& RADV_DEBUG_STARTUP
)
736 radv_logi("Created an instance");
738 for (uint32_t i
= 0; i
< pCreateInfo
->enabledExtensionCount
; i
++) {
739 const char *ext_name
= pCreateInfo
->ppEnabledExtensionNames
[i
];
740 int index
= radv_get_instance_extension_index(ext_name
);
742 if (index
< 0 || !radv_supported_instance_extensions
.extensions
[index
]) {
743 vk_free2(&default_alloc
, pAllocator
, instance
);
744 return vk_error(instance
, VK_ERROR_EXTENSION_NOT_PRESENT
);
747 instance
->enabled_extensions
.extensions
[index
] = true;
750 result
= vk_debug_report_instance_init(&instance
->debug_report_callbacks
);
751 if (result
!= VK_SUCCESS
) {
752 vk_free2(&default_alloc
, pAllocator
, instance
);
753 return vk_error(instance
, result
);
756 instance
->engineName
= vk_strdup(&instance
->alloc
, engine_name
,
757 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE
);
758 instance
->engineVersion
= engine_version
;
760 glsl_type_singleton_init_or_ref();
762 VG(VALGRIND_CREATE_MEMPOOL(instance
, 0, false));
764 radv_init_dri_options(instance
);
765 radv_handle_per_app_options(instance
, pCreateInfo
->pApplicationInfo
);
767 *pInstance
= radv_instance_to_handle(instance
);
772 void radv_DestroyInstance(
773 VkInstance _instance
,
774 const VkAllocationCallbacks
* pAllocator
)
776 RADV_FROM_HANDLE(radv_instance
, instance
, _instance
);
781 for (int i
= 0; i
< instance
->physicalDeviceCount
; ++i
) {
782 radv_physical_device_finish(instance
->physicalDevices
+ i
);
785 vk_free(&instance
->alloc
, instance
->engineName
);
787 VG(VALGRIND_DESTROY_MEMPOOL(instance
));
789 glsl_type_singleton_decref();
791 driDestroyOptionCache(&instance
->dri_options
);
792 driDestroyOptionInfo(&instance
->available_dri_options
);
794 vk_debug_report_instance_destroy(&instance
->debug_report_callbacks
);
796 vk_free(&instance
->alloc
, instance
);
800 radv_enumerate_devices(struct radv_instance
*instance
)
802 /* TODO: Check for more devices ? */
803 drmDevicePtr devices
[8];
804 VkResult result
= VK_ERROR_INCOMPATIBLE_DRIVER
;
807 instance
->physicalDeviceCount
= 0;
809 max_devices
= drmGetDevices2(0, devices
, ARRAY_SIZE(devices
));
811 if (instance
->debug_flags
& RADV_DEBUG_STARTUP
)
812 radv_logi("Found %d drm nodes", max_devices
);
815 return vk_error(instance
, VK_ERROR_INCOMPATIBLE_DRIVER
);
817 for (unsigned i
= 0; i
< (unsigned)max_devices
; i
++) {
818 if (devices
[i
]->available_nodes
& 1 << DRM_NODE_RENDER
&&
819 devices
[i
]->bustype
== DRM_BUS_PCI
&&
820 devices
[i
]->deviceinfo
.pci
->vendor_id
== ATI_VENDOR_ID
) {
822 result
= radv_physical_device_init(instance
->physicalDevices
+
823 instance
->physicalDeviceCount
,
826 if (result
== VK_SUCCESS
)
827 ++instance
->physicalDeviceCount
;
828 else if (result
!= VK_ERROR_INCOMPATIBLE_DRIVER
)
832 drmFreeDevices(devices
, max_devices
);
837 VkResult
radv_EnumeratePhysicalDevices(
838 VkInstance _instance
,
839 uint32_t* pPhysicalDeviceCount
,
840 VkPhysicalDevice
* pPhysicalDevices
)
842 RADV_FROM_HANDLE(radv_instance
, instance
, _instance
);
845 if (instance
->physicalDeviceCount
< 0) {
846 result
= radv_enumerate_devices(instance
);
847 if (result
!= VK_SUCCESS
&&
848 result
!= VK_ERROR_INCOMPATIBLE_DRIVER
)
852 if (!pPhysicalDevices
) {
853 *pPhysicalDeviceCount
= instance
->physicalDeviceCount
;
855 *pPhysicalDeviceCount
= MIN2(*pPhysicalDeviceCount
, instance
->physicalDeviceCount
);
856 for (unsigned i
= 0; i
< *pPhysicalDeviceCount
; ++i
)
857 pPhysicalDevices
[i
] = radv_physical_device_to_handle(instance
->physicalDevices
+ i
);
860 return *pPhysicalDeviceCount
< instance
->physicalDeviceCount
? VK_INCOMPLETE
864 VkResult
radv_EnumeratePhysicalDeviceGroups(
865 VkInstance _instance
,
866 uint32_t* pPhysicalDeviceGroupCount
,
867 VkPhysicalDeviceGroupProperties
* pPhysicalDeviceGroupProperties
)
869 RADV_FROM_HANDLE(radv_instance
, instance
, _instance
);
872 if (instance
->physicalDeviceCount
< 0) {
873 result
= radv_enumerate_devices(instance
);
874 if (result
!= VK_SUCCESS
&&
875 result
!= VK_ERROR_INCOMPATIBLE_DRIVER
)
879 if (!pPhysicalDeviceGroupProperties
) {
880 *pPhysicalDeviceGroupCount
= instance
->physicalDeviceCount
;
882 *pPhysicalDeviceGroupCount
= MIN2(*pPhysicalDeviceGroupCount
, instance
->physicalDeviceCount
);
883 for (unsigned i
= 0; i
< *pPhysicalDeviceGroupCount
; ++i
) {
884 pPhysicalDeviceGroupProperties
[i
].physicalDeviceCount
= 1;
885 pPhysicalDeviceGroupProperties
[i
].physicalDevices
[0] = radv_physical_device_to_handle(instance
->physicalDevices
+ i
);
886 pPhysicalDeviceGroupProperties
[i
].subsetAllocation
= false;
889 return *pPhysicalDeviceGroupCount
< instance
->physicalDeviceCount
? VK_INCOMPLETE
893 void radv_GetPhysicalDeviceFeatures(
894 VkPhysicalDevice physicalDevice
,
895 VkPhysicalDeviceFeatures
* pFeatures
)
897 RADV_FROM_HANDLE(radv_physical_device
, pdevice
, physicalDevice
);
898 memset(pFeatures
, 0, sizeof(*pFeatures
));
900 *pFeatures
= (VkPhysicalDeviceFeatures
) {
901 .robustBufferAccess
= true,
902 .fullDrawIndexUint32
= true,
903 .imageCubeArray
= true,
904 .independentBlend
= true,
905 .geometryShader
= true,
906 .tessellationShader
= true,
907 .sampleRateShading
= true,
908 .dualSrcBlend
= true,
910 .multiDrawIndirect
= true,
911 .drawIndirectFirstInstance
= true,
913 .depthBiasClamp
= true,
914 .fillModeNonSolid
= true,
919 .multiViewport
= true,
920 .samplerAnisotropy
= true,
921 .textureCompressionETC2
= radv_device_supports_etc(pdevice
),
922 .textureCompressionASTC_LDR
= false,
923 .textureCompressionBC
= true,
924 .occlusionQueryPrecise
= true,
925 .pipelineStatisticsQuery
= true,
926 .vertexPipelineStoresAndAtomics
= true,
927 .fragmentStoresAndAtomics
= true,
928 .shaderTessellationAndGeometryPointSize
= true,
929 .shaderImageGatherExtended
= true,
930 .shaderStorageImageExtendedFormats
= true,
931 .shaderStorageImageMultisample
= pdevice
->rad_info
.chip_class
>= GFX8
,
932 .shaderUniformBufferArrayDynamicIndexing
= true,
933 .shaderSampledImageArrayDynamicIndexing
= true,
934 .shaderStorageBufferArrayDynamicIndexing
= true,
935 .shaderStorageImageArrayDynamicIndexing
= true,
936 .shaderStorageImageReadWithoutFormat
= true,
937 .shaderStorageImageWriteWithoutFormat
= true,
938 .shaderClipDistance
= true,
939 .shaderCullDistance
= true,
940 .shaderFloat64
= true,
942 .shaderInt16
= pdevice
->rad_info
.chip_class
>= GFX9
&& !pdevice
->use_aco
,
943 .sparseBinding
= true,
944 .variableMultisampleRate
= true,
945 .inheritedQueries
= true,
949 void radv_GetPhysicalDeviceFeatures2(
950 VkPhysicalDevice physicalDevice
,
951 VkPhysicalDeviceFeatures2
*pFeatures
)
953 RADV_FROM_HANDLE(radv_physical_device
, pdevice
, physicalDevice
);
954 vk_foreach_struct(ext
, pFeatures
->pNext
) {
955 switch (ext
->sType
) {
956 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES
: {
957 VkPhysicalDeviceVariablePointersFeatures
*features
= (void *)ext
;
958 features
->variablePointersStorageBuffer
= true;
959 features
->variablePointers
= true;
962 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES
: {
963 VkPhysicalDeviceMultiviewFeatures
*features
= (VkPhysicalDeviceMultiviewFeatures
*)ext
;
964 features
->multiview
= true;
965 features
->multiviewGeometryShader
= true;
966 features
->multiviewTessellationShader
= true;
969 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES
: {
970 VkPhysicalDeviceShaderDrawParametersFeatures
*features
=
971 (VkPhysicalDeviceShaderDrawParametersFeatures
*)ext
;
972 features
->shaderDrawParameters
= true;
975 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES
: {
976 VkPhysicalDeviceProtectedMemoryFeatures
*features
=
977 (VkPhysicalDeviceProtectedMemoryFeatures
*)ext
;
978 features
->protectedMemory
= false;
981 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES
: {
982 VkPhysicalDevice16BitStorageFeatures
*features
=
983 (VkPhysicalDevice16BitStorageFeatures
*)ext
;
984 bool enabled
= pdevice
->rad_info
.chip_class
>= GFX8
&& !pdevice
->use_aco
;
985 features
->storageBuffer16BitAccess
= enabled
;
986 features
->uniformAndStorageBuffer16BitAccess
= enabled
;
987 features
->storagePushConstant16
= enabled
;
988 features
->storageInputOutput16
= enabled
&& LLVM_VERSION_MAJOR
>= 9;
991 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES
: {
992 VkPhysicalDeviceSamplerYcbcrConversionFeatures
*features
=
993 (VkPhysicalDeviceSamplerYcbcrConversionFeatures
*)ext
;
994 features
->samplerYcbcrConversion
= true;
997 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT
: {
998 VkPhysicalDeviceDescriptorIndexingFeaturesEXT
*features
=
999 (VkPhysicalDeviceDescriptorIndexingFeaturesEXT
*)ext
;
1000 features
->shaderInputAttachmentArrayDynamicIndexing
= true;
1001 features
->shaderUniformTexelBufferArrayDynamicIndexing
= true;
1002 features
->shaderStorageTexelBufferArrayDynamicIndexing
= true;
1003 features
->shaderUniformBufferArrayNonUniformIndexing
= true;
1004 features
->shaderSampledImageArrayNonUniformIndexing
= true;
1005 features
->shaderStorageBufferArrayNonUniformIndexing
= true;
1006 features
->shaderStorageImageArrayNonUniformIndexing
= true;
1007 features
->shaderInputAttachmentArrayNonUniformIndexing
= true;
1008 features
->shaderUniformTexelBufferArrayNonUniformIndexing
= true;
1009 features
->shaderStorageTexelBufferArrayNonUniformIndexing
= true;
1010 features
->descriptorBindingUniformBufferUpdateAfterBind
= true;
1011 features
->descriptorBindingSampledImageUpdateAfterBind
= true;
1012 features
->descriptorBindingStorageImageUpdateAfterBind
= true;
1013 features
->descriptorBindingStorageBufferUpdateAfterBind
= true;
1014 features
->descriptorBindingUniformTexelBufferUpdateAfterBind
= true;
1015 features
->descriptorBindingStorageTexelBufferUpdateAfterBind
= true;
1016 features
->descriptorBindingUpdateUnusedWhilePending
= true;
1017 features
->descriptorBindingPartiallyBound
= true;
1018 features
->descriptorBindingVariableDescriptorCount
= true;
1019 features
->runtimeDescriptorArray
= true;
1022 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT
: {
1023 VkPhysicalDeviceConditionalRenderingFeaturesEXT
*features
=
1024 (VkPhysicalDeviceConditionalRenderingFeaturesEXT
*)ext
;
1025 features
->conditionalRendering
= true;
1026 features
->inheritedConditionalRendering
= false;
1029 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT
: {
1030 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT
*features
=
1031 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT
*)ext
;
1032 features
->vertexAttributeInstanceRateDivisor
= true;
1033 features
->vertexAttributeInstanceRateZeroDivisor
= true;
1036 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT
: {
1037 VkPhysicalDeviceTransformFeedbackFeaturesEXT
*features
=
1038 (VkPhysicalDeviceTransformFeedbackFeaturesEXT
*)ext
;
1039 features
->transformFeedback
= true;
1040 features
->geometryStreams
= !pdevice
->use_ngg_streamout
;
1043 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT
: {
1044 VkPhysicalDeviceScalarBlockLayoutFeaturesEXT
*features
=
1045 (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT
*)ext
;
1046 features
->scalarBlockLayout
= pdevice
->rad_info
.chip_class
>= GFX7
;
1049 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT
: {
1050 VkPhysicalDeviceMemoryPriorityFeaturesEXT
*features
=
1051 (VkPhysicalDeviceMemoryPriorityFeaturesEXT
*)ext
;
1052 features
->memoryPriority
= true;
1055 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT
: {
1056 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT
*features
=
1057 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT
*)ext
;
1058 features
->bufferDeviceAddress
= true;
1059 features
->bufferDeviceAddressCaptureReplay
= false;
1060 features
->bufferDeviceAddressMultiDevice
= false;
1063 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR
: {
1064 VkPhysicalDeviceBufferDeviceAddressFeaturesKHR
*features
=
1065 (VkPhysicalDeviceBufferDeviceAddressFeaturesKHR
*)ext
;
1066 features
->bufferDeviceAddress
= true;
1067 features
->bufferDeviceAddressCaptureReplay
= false;
1068 features
->bufferDeviceAddressMultiDevice
= false;
1071 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT
: {
1072 VkPhysicalDeviceDepthClipEnableFeaturesEXT
*features
=
1073 (VkPhysicalDeviceDepthClipEnableFeaturesEXT
*)ext
;
1074 features
->depthClipEnable
= true;
1077 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT
: {
1078 VkPhysicalDeviceHostQueryResetFeaturesEXT
*features
=
1079 (VkPhysicalDeviceHostQueryResetFeaturesEXT
*)ext
;
1080 features
->hostQueryReset
= true;
1083 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR
: {
1084 VkPhysicalDevice8BitStorageFeaturesKHR
*features
=
1085 (VkPhysicalDevice8BitStorageFeaturesKHR
*)ext
;
1086 bool enabled
= pdevice
->rad_info
.chip_class
>= GFX8
&& !pdevice
->use_aco
;
1087 features
->storageBuffer8BitAccess
= enabled
;
1088 features
->uniformAndStorageBuffer8BitAccess
= enabled
;
1089 features
->storagePushConstant8
= enabled
;
1092 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR
: {
1093 VkPhysicalDeviceShaderFloat16Int8FeaturesKHR
*features
=
1094 (VkPhysicalDeviceShaderFloat16Int8FeaturesKHR
*)ext
;
1095 features
->shaderFloat16
= pdevice
->rad_info
.chip_class
>= GFX8
&& !pdevice
->use_aco
;
1096 features
->shaderInt8
= !pdevice
->use_aco
;
1099 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR
: {
1100 VkPhysicalDeviceShaderAtomicInt64FeaturesKHR
*features
=
1101 (VkPhysicalDeviceShaderAtomicInt64FeaturesKHR
*)ext
;
1102 features
->shaderBufferInt64Atomics
= LLVM_VERSION_MAJOR
>= 9;
1103 features
->shaderSharedInt64Atomics
= LLVM_VERSION_MAJOR
>= 9;
1106 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT
: {
1107 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT
*features
=
1108 (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT
*)ext
;
1109 features
->shaderDemoteToHelperInvocation
= pdevice
->use_aco
;
1112 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT
: {
1113 VkPhysicalDeviceInlineUniformBlockFeaturesEXT
*features
=
1114 (VkPhysicalDeviceInlineUniformBlockFeaturesEXT
*)ext
;
1116 features
->inlineUniformBlock
= true;
1117 features
->descriptorBindingInlineUniformBlockUpdateAfterBind
= true;
1120 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV
: {
1121 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV
*features
=
1122 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV
*)ext
;
1123 features
->computeDerivativeGroupQuads
= false;
1124 features
->computeDerivativeGroupLinear
= true;
1127 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT
: {
1128 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT
*features
=
1129 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT
*)ext
;
1130 features
->ycbcrImageArrays
= true;
1133 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR
: {
1134 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR
*features
=
1135 (VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR
*)ext
;
1136 features
->uniformBufferStandardLayout
= true;
1139 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT
: {
1140 VkPhysicalDeviceIndexTypeUint8FeaturesEXT
*features
=
1141 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT
*)ext
;
1142 features
->indexTypeUint8
= pdevice
->rad_info
.chip_class
>= GFX8
;
1145 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR
: {
1146 VkPhysicalDeviceImagelessFramebufferFeaturesKHR
*features
=
1147 (VkPhysicalDeviceImagelessFramebufferFeaturesKHR
*)ext
;
1148 features
->imagelessFramebuffer
= true;
1151 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR
: {
1152 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR
*features
=
1153 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR
*)ext
;
1154 features
->pipelineExecutableInfo
= true;
1157 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR
: {
1158 VkPhysicalDeviceShaderClockFeaturesKHR
*features
=
1159 (VkPhysicalDeviceShaderClockFeaturesKHR
*)ext
;
1160 features
->shaderSubgroupClock
= true;
1161 features
->shaderDeviceClock
= false;
1164 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT
: {
1165 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT
*features
=
1166 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT
*)ext
;
1167 features
->texelBufferAlignment
= true;
1170 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR
: {
1171 VkPhysicalDeviceTimelineSemaphoreFeaturesKHR
*features
=
1172 (VkPhysicalDeviceTimelineSemaphoreFeaturesKHR
*) ext
;
1173 features
->timelineSemaphore
= true;
1176 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT
: {
1177 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT
*features
=
1178 (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT
*)ext
;
1179 features
->subgroupSizeControl
= true;
1180 features
->computeFullSubgroups
= true;
1183 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD
: {
1184 VkPhysicalDeviceCoherentMemoryFeaturesAMD
*features
=
1185 (VkPhysicalDeviceCoherentMemoryFeaturesAMD
*)ext
;
1186 features
->deviceCoherentMemory
= pdevice
->rad_info
.has_l2_uncached
;
1189 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES_KHR
: {
1190 VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR
*features
=
1191 (VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR
*)ext
;
1192 features
->shaderSubgroupExtendedTypes
= true;
1195 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR
: {
1196 VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR
*features
=
1197 (VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR
*)ext
;
1198 features
->separateDepthStencilLayouts
= true;
1205 return radv_GetPhysicalDeviceFeatures(physicalDevice
, &pFeatures
->features
);
1209 radv_max_descriptor_set_size()
1211 /* make sure that the entire descriptor set is addressable with a signed
1212 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1213 * be at most 2 GiB. the combined image & samples object count as one of
1214 * both. This limit is for the pipeline layout, not for the set layout, but
1215 * there is no set limit, so we just set a pipeline limit. I don't think
1216 * any app is going to hit this soon. */
1217 return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
1218 - MAX_INLINE_UNIFORM_BLOCK_SIZE
* MAX_INLINE_UNIFORM_BLOCK_COUNT
) /
1219 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1220 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1221 32 /* sampler, largest when combined with image */ +
1222 64 /* sampled image */ +
1223 64 /* storage image */);
1226 void radv_GetPhysicalDeviceProperties(
1227 VkPhysicalDevice physicalDevice
,
1228 VkPhysicalDeviceProperties
* pProperties
)
1230 RADV_FROM_HANDLE(radv_physical_device
, pdevice
, physicalDevice
);
1231 VkSampleCountFlags sample_counts
= 0xf;
1233 size_t max_descriptor_set_size
= radv_max_descriptor_set_size();
1235 VkPhysicalDeviceLimits limits
= {
1236 .maxImageDimension1D
= (1 << 14),
1237 .maxImageDimension2D
= (1 << 14),
1238 .maxImageDimension3D
= (1 << 11),
1239 .maxImageDimensionCube
= (1 << 14),
1240 .maxImageArrayLayers
= (1 << 11),
1241 .maxTexelBufferElements
= 128 * 1024 * 1024,
1242 .maxUniformBufferRange
= UINT32_MAX
,
1243 .maxStorageBufferRange
= UINT32_MAX
,
1244 .maxPushConstantsSize
= MAX_PUSH_CONSTANTS_SIZE
,
1245 .maxMemoryAllocationCount
= UINT32_MAX
,
1246 .maxSamplerAllocationCount
= 64 * 1024,
1247 .bufferImageGranularity
= 64, /* A cache line */
1248 .sparseAddressSpaceSize
= 0xffffffffu
, /* buffer max size */
1249 .maxBoundDescriptorSets
= MAX_SETS
,
1250 .maxPerStageDescriptorSamplers
= max_descriptor_set_size
,
1251 .maxPerStageDescriptorUniformBuffers
= max_descriptor_set_size
,
1252 .maxPerStageDescriptorStorageBuffers
= max_descriptor_set_size
,
1253 .maxPerStageDescriptorSampledImages
= max_descriptor_set_size
,
1254 .maxPerStageDescriptorStorageImages
= max_descriptor_set_size
,
1255 .maxPerStageDescriptorInputAttachments
= max_descriptor_set_size
,
1256 .maxPerStageResources
= max_descriptor_set_size
,
1257 .maxDescriptorSetSamplers
= max_descriptor_set_size
,
1258 .maxDescriptorSetUniformBuffers
= max_descriptor_set_size
,
1259 .maxDescriptorSetUniformBuffersDynamic
= MAX_DYNAMIC_UNIFORM_BUFFERS
,
1260 .maxDescriptorSetStorageBuffers
= max_descriptor_set_size
,
1261 .maxDescriptorSetStorageBuffersDynamic
= MAX_DYNAMIC_STORAGE_BUFFERS
,
1262 .maxDescriptorSetSampledImages
= max_descriptor_set_size
,
1263 .maxDescriptorSetStorageImages
= max_descriptor_set_size
,
1264 .maxDescriptorSetInputAttachments
= max_descriptor_set_size
,
1265 .maxVertexInputAttributes
= MAX_VERTEX_ATTRIBS
,
1266 .maxVertexInputBindings
= MAX_VBS
,
1267 .maxVertexInputAttributeOffset
= 2047,
1268 .maxVertexInputBindingStride
= 2048,
1269 .maxVertexOutputComponents
= 128,
1270 .maxTessellationGenerationLevel
= 64,
1271 .maxTessellationPatchSize
= 32,
1272 .maxTessellationControlPerVertexInputComponents
= 128,
1273 .maxTessellationControlPerVertexOutputComponents
= 128,
1274 .maxTessellationControlPerPatchOutputComponents
= 120,
1275 .maxTessellationControlTotalOutputComponents
= 4096,
1276 .maxTessellationEvaluationInputComponents
= 128,
1277 .maxTessellationEvaluationOutputComponents
= 128,
1278 .maxGeometryShaderInvocations
= 127,
1279 .maxGeometryInputComponents
= 64,
1280 .maxGeometryOutputComponents
= 128,
1281 .maxGeometryOutputVertices
= 256,
1282 .maxGeometryTotalOutputComponents
= 1024,
1283 .maxFragmentInputComponents
= 128,
1284 .maxFragmentOutputAttachments
= 8,
1285 .maxFragmentDualSrcAttachments
= 1,
1286 .maxFragmentCombinedOutputResources
= 8,
1287 .maxComputeSharedMemorySize
= 32768,
1288 .maxComputeWorkGroupCount
= { 65535, 65535, 65535 },
1289 .maxComputeWorkGroupInvocations
= 1024,
1290 .maxComputeWorkGroupSize
= {
1295 .subPixelPrecisionBits
= 8,
1296 .subTexelPrecisionBits
= 8,
1297 .mipmapPrecisionBits
= 8,
1298 .maxDrawIndexedIndexValue
= UINT32_MAX
,
1299 .maxDrawIndirectCount
= UINT32_MAX
,
1300 .maxSamplerLodBias
= 16,
1301 .maxSamplerAnisotropy
= 16,
1302 .maxViewports
= MAX_VIEWPORTS
,
1303 .maxViewportDimensions
= { (1 << 14), (1 << 14) },
1304 .viewportBoundsRange
= { INT16_MIN
, INT16_MAX
},
1305 .viewportSubPixelBits
= 8,
1306 .minMemoryMapAlignment
= 4096, /* A page */
1307 .minTexelBufferOffsetAlignment
= 4,
1308 .minUniformBufferOffsetAlignment
= 4,
1309 .minStorageBufferOffsetAlignment
= 4,
1310 .minTexelOffset
= -32,
1311 .maxTexelOffset
= 31,
1312 .minTexelGatherOffset
= -32,
1313 .maxTexelGatherOffset
= 31,
1314 .minInterpolationOffset
= -2,
1315 .maxInterpolationOffset
= 2,
1316 .subPixelInterpolationOffsetBits
= 8,
1317 .maxFramebufferWidth
= (1 << 14),
1318 .maxFramebufferHeight
= (1 << 14),
1319 .maxFramebufferLayers
= (1 << 10),
1320 .framebufferColorSampleCounts
= sample_counts
,
1321 .framebufferDepthSampleCounts
= sample_counts
,
1322 .framebufferStencilSampleCounts
= sample_counts
,
1323 .framebufferNoAttachmentsSampleCounts
= sample_counts
,
1324 .maxColorAttachments
= MAX_RTS
,
1325 .sampledImageColorSampleCounts
= sample_counts
,
1326 .sampledImageIntegerSampleCounts
= VK_SAMPLE_COUNT_1_BIT
,
1327 .sampledImageDepthSampleCounts
= sample_counts
,
1328 .sampledImageStencilSampleCounts
= sample_counts
,
1329 .storageImageSampleCounts
= pdevice
->rad_info
.chip_class
>= GFX8
? sample_counts
: VK_SAMPLE_COUNT_1_BIT
,
1330 .maxSampleMaskWords
= 1,
1331 .timestampComputeAndGraphics
= true,
1332 .timestampPeriod
= 1000000.0 / pdevice
->rad_info
.clock_crystal_freq
,
1333 .maxClipDistances
= 8,
1334 .maxCullDistances
= 8,
1335 .maxCombinedClipAndCullDistances
= 8,
1336 .discreteQueuePriorities
= 2,
1337 .pointSizeRange
= { 0.0, 8192.0 },
1338 .lineWidthRange
= { 0.0, 7.9921875 },
1339 .pointSizeGranularity
= (1.0 / 8.0),
1340 .lineWidthGranularity
= (1.0 / 128.0),
1341 .strictLines
= false, /* FINISHME */
1342 .standardSampleLocations
= true,
1343 .optimalBufferCopyOffsetAlignment
= 128,
1344 .optimalBufferCopyRowPitchAlignment
= 128,
1345 .nonCoherentAtomSize
= 64,
1348 *pProperties
= (VkPhysicalDeviceProperties
) {
1349 .apiVersion
= radv_physical_device_api_version(pdevice
),
1350 .driverVersion
= vk_get_driver_version(),
1351 .vendorID
= ATI_VENDOR_ID
,
1352 .deviceID
= pdevice
->rad_info
.pci_id
,
1353 .deviceType
= pdevice
->rad_info
.has_dedicated_vram
? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
: VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU
,
1355 .sparseProperties
= {0},
1358 strcpy(pProperties
->deviceName
, pdevice
->name
);
1359 memcpy(pProperties
->pipelineCacheUUID
, pdevice
->cache_uuid
, VK_UUID_SIZE
);
1362 void radv_GetPhysicalDeviceProperties2(
1363 VkPhysicalDevice physicalDevice
,
1364 VkPhysicalDeviceProperties2
*pProperties
)
1366 RADV_FROM_HANDLE(radv_physical_device
, pdevice
, physicalDevice
);
1367 radv_GetPhysicalDeviceProperties(physicalDevice
, &pProperties
->properties
);
1369 vk_foreach_struct(ext
, pProperties
->pNext
) {
1370 switch (ext
->sType
) {
1371 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR
: {
1372 VkPhysicalDevicePushDescriptorPropertiesKHR
*properties
=
1373 (VkPhysicalDevicePushDescriptorPropertiesKHR
*) ext
;
1374 properties
->maxPushDescriptors
= MAX_PUSH_DESCRIPTORS
;
1377 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES
: {
1378 VkPhysicalDeviceIDProperties
*properties
= (VkPhysicalDeviceIDProperties
*)ext
;
1379 memcpy(properties
->driverUUID
, pdevice
->driver_uuid
, VK_UUID_SIZE
);
1380 memcpy(properties
->deviceUUID
, pdevice
->device_uuid
, VK_UUID_SIZE
);
1381 properties
->deviceLUIDValid
= false;
1384 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES
: {
1385 VkPhysicalDeviceMultiviewProperties
*properties
= (VkPhysicalDeviceMultiviewProperties
*)ext
;
1386 properties
->maxMultiviewViewCount
= MAX_VIEWS
;
1387 properties
->maxMultiviewInstanceIndex
= INT_MAX
;
1390 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES
: {
1391 VkPhysicalDevicePointClippingProperties
*properties
=
1392 (VkPhysicalDevicePointClippingProperties
*)ext
;
1393 properties
->pointClippingBehavior
= VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES
;
1396 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT
: {
1397 VkPhysicalDeviceDiscardRectanglePropertiesEXT
*properties
=
1398 (VkPhysicalDeviceDiscardRectanglePropertiesEXT
*)ext
;
1399 properties
->maxDiscardRectangles
= MAX_DISCARD_RECTANGLES
;
1402 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT
: {
1403 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
*properties
=
1404 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT
*) ext
;
1405 properties
->minImportedHostPointerAlignment
= 4096;
1408 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES
: {
1409 VkPhysicalDeviceSubgroupProperties
*properties
=
1410 (VkPhysicalDeviceSubgroupProperties
*)ext
;
1411 properties
->subgroupSize
= RADV_SUBGROUP_SIZE
;
1412 properties
->supportedStages
= VK_SHADER_STAGE_ALL
;
1413 properties
->supportedOperations
=
1414 VK_SUBGROUP_FEATURE_BASIC_BIT
|
1415 VK_SUBGROUP_FEATURE_VOTE_BIT
|
1416 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT
|
1417 VK_SUBGROUP_FEATURE_BALLOT_BIT
|
1418 VK_SUBGROUP_FEATURE_CLUSTERED_BIT
|
1419 VK_SUBGROUP_FEATURE_QUAD_BIT
;
1420 if (pdevice
->rad_info
.chip_class
== GFX8
||
1421 pdevice
->rad_info
.chip_class
== GFX9
) {
1422 properties
->supportedOperations
|=
1423 VK_SUBGROUP_FEATURE_SHUFFLE_BIT
|
1424 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT
;
1426 properties
->quadOperationsInAllStages
= true;
1429 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES
: {
1430 VkPhysicalDeviceMaintenance3Properties
*properties
=
1431 (VkPhysicalDeviceMaintenance3Properties
*)ext
;
1432 properties
->maxPerSetDescriptors
= RADV_MAX_PER_SET_DESCRIPTORS
;
1433 properties
->maxMemoryAllocationSize
= RADV_MAX_MEMORY_ALLOCATION_SIZE
;
1436 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT
: {
1437 VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT
*properties
=
1438 (VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT
*)ext
;
1439 /* GFX6-8 only support single channel min/max filter. */
1440 properties
->filterMinmaxImageComponentMapping
= pdevice
->rad_info
.chip_class
>= GFX9
;
1441 properties
->filterMinmaxSingleComponentFormats
= true;
1444 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD
: {
1445 VkPhysicalDeviceShaderCorePropertiesAMD
*properties
=
1446 (VkPhysicalDeviceShaderCorePropertiesAMD
*)ext
;
1448 /* Shader engines. */
1449 properties
->shaderEngineCount
=
1450 pdevice
->rad_info
.max_se
;
1451 properties
->shaderArraysPerEngineCount
=
1452 pdevice
->rad_info
.max_sh_per_se
;
1453 properties
->computeUnitsPerShaderArray
=
1454 pdevice
->rad_info
.num_good_cu_per_sh
;
1455 properties
->simdPerComputeUnit
= 4;
1456 properties
->wavefrontsPerSimd
=
1457 pdevice
->rad_info
.family
== CHIP_TONGA
||
1458 pdevice
->rad_info
.family
== CHIP_ICELAND
||
1459 pdevice
->rad_info
.family
== CHIP_POLARIS10
||
1460 pdevice
->rad_info
.family
== CHIP_POLARIS11
||
1461 pdevice
->rad_info
.family
== CHIP_POLARIS12
||
1462 pdevice
->rad_info
.family
== CHIP_VEGAM
? 8 : 10;
1463 properties
->wavefrontSize
= 64;
1466 properties
->sgprsPerSimd
=
1467 pdevice
->rad_info
.num_physical_sgprs_per_simd
;
1468 properties
->minSgprAllocation
=
1469 pdevice
->rad_info
.chip_class
>= GFX8
? 16 : 8;
1470 properties
->maxSgprAllocation
=
1471 pdevice
->rad_info
.family
== CHIP_TONGA
||
1472 pdevice
->rad_info
.family
== CHIP_ICELAND
? 96 : 104;
1473 properties
->sgprAllocationGranularity
=
1474 pdevice
->rad_info
.chip_class
>= GFX8
? 16 : 8;
1477 properties
->vgprsPerSimd
= RADV_NUM_PHYSICAL_VGPRS
;
1478 properties
->minVgprAllocation
= 4;
1479 properties
->maxVgprAllocation
= 256;
1480 properties
->vgprAllocationGranularity
= 4;
1483 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD
: {
1484 VkPhysicalDeviceShaderCoreProperties2AMD
*properties
=
1485 (VkPhysicalDeviceShaderCoreProperties2AMD
*)ext
;
1487 properties
->shaderCoreFeatures
= 0;
1488 properties
->activeComputeUnitCount
=
1489 pdevice
->rad_info
.num_good_compute_units
;
1492 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT
: {
1493 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT
*properties
=
1494 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT
*)ext
;
1495 properties
->maxVertexAttribDivisor
= UINT32_MAX
;
1498 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT
: {
1499 VkPhysicalDeviceDescriptorIndexingPropertiesEXT
*properties
=
1500 (VkPhysicalDeviceDescriptorIndexingPropertiesEXT
*)ext
;
1501 properties
->maxUpdateAfterBindDescriptorsInAllPools
= UINT32_MAX
/ 64;
1502 properties
->shaderUniformBufferArrayNonUniformIndexingNative
= false;
1503 properties
->shaderSampledImageArrayNonUniformIndexingNative
= false;
1504 properties
->shaderStorageBufferArrayNonUniformIndexingNative
= false;
1505 properties
->shaderStorageImageArrayNonUniformIndexingNative
= false;
1506 properties
->shaderInputAttachmentArrayNonUniformIndexingNative
= false;
1507 properties
->robustBufferAccessUpdateAfterBind
= false;
1508 properties
->quadDivergentImplicitLod
= false;
1510 size_t max_descriptor_set_size
= radv_max_descriptor_set_size();
1511 properties
->maxPerStageDescriptorUpdateAfterBindSamplers
= max_descriptor_set_size
;
1512 properties
->maxPerStageDescriptorUpdateAfterBindUniformBuffers
= max_descriptor_set_size
;
1513 properties
->maxPerStageDescriptorUpdateAfterBindStorageBuffers
= max_descriptor_set_size
;
1514 properties
->maxPerStageDescriptorUpdateAfterBindSampledImages
= max_descriptor_set_size
;
1515 properties
->maxPerStageDescriptorUpdateAfterBindStorageImages
= max_descriptor_set_size
;
1516 properties
->maxPerStageDescriptorUpdateAfterBindInputAttachments
= max_descriptor_set_size
;
1517 properties
->maxPerStageUpdateAfterBindResources
= max_descriptor_set_size
;
1518 properties
->maxDescriptorSetUpdateAfterBindSamplers
= max_descriptor_set_size
;
1519 properties
->maxDescriptorSetUpdateAfterBindUniformBuffers
= max_descriptor_set_size
;
1520 properties
->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic
= MAX_DYNAMIC_UNIFORM_BUFFERS
;
1521 properties
->maxDescriptorSetUpdateAfterBindStorageBuffers
= max_descriptor_set_size
;
1522 properties
->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic
= MAX_DYNAMIC_STORAGE_BUFFERS
;
1523 properties
->maxDescriptorSetUpdateAfterBindSampledImages
= max_descriptor_set_size
;
1524 properties
->maxDescriptorSetUpdateAfterBindStorageImages
= max_descriptor_set_size
;
1525 properties
->maxDescriptorSetUpdateAfterBindInputAttachments
= max_descriptor_set_size
;
1528 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES
: {
1529 VkPhysicalDeviceProtectedMemoryProperties
*properties
=
1530 (VkPhysicalDeviceProtectedMemoryProperties
*)ext
;
1531 properties
->protectedNoFault
= false;
1534 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT
: {
1535 VkPhysicalDeviceConservativeRasterizationPropertiesEXT
*properties
=
1536 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT
*)ext
;
1537 properties
->primitiveOverestimationSize
= 0;
1538 properties
->maxExtraPrimitiveOverestimationSize
= 0;
1539 properties
->extraPrimitiveOverestimationSizeGranularity
= 0;
1540 properties
->primitiveUnderestimation
= false;
1541 properties
->conservativePointAndLineRasterization
= false;
1542 properties
->degenerateTrianglesRasterized
= false;
1543 properties
->degenerateLinesRasterized
= false;
1544 properties
->fullyCoveredFragmentShaderInputVariable
= false;
1545 properties
->conservativeRasterizationPostDepthCoverage
= false;
1548 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT
: {
1549 VkPhysicalDevicePCIBusInfoPropertiesEXT
*properties
=
1550 (VkPhysicalDevicePCIBusInfoPropertiesEXT
*)ext
;
1551 properties
->pciDomain
= pdevice
->bus_info
.domain
;
1552 properties
->pciBus
= pdevice
->bus_info
.bus
;
1553 properties
->pciDevice
= pdevice
->bus_info
.dev
;
1554 properties
->pciFunction
= pdevice
->bus_info
.func
;
1557 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR
: {
1558 VkPhysicalDeviceDriverPropertiesKHR
*driver_props
=
1559 (VkPhysicalDeviceDriverPropertiesKHR
*) ext
;
1561 driver_props
->driverID
= VK_DRIVER_ID_MESA_RADV_KHR
;
1562 snprintf(driver_props
->driverName
, VK_MAX_DRIVER_NAME_SIZE_KHR
, "radv");
1563 snprintf(driver_props
->driverInfo
, VK_MAX_DRIVER_INFO_SIZE_KHR
,
1564 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1
1565 " (LLVM " MESA_LLVM_VERSION_STRING
")");
1567 driver_props
->conformanceVersion
= (VkConformanceVersionKHR
) {
1575 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT
: {
1576 VkPhysicalDeviceTransformFeedbackPropertiesEXT
*properties
=
1577 (VkPhysicalDeviceTransformFeedbackPropertiesEXT
*)ext
;
1578 properties
->maxTransformFeedbackStreams
= MAX_SO_STREAMS
;
1579 properties
->maxTransformFeedbackBuffers
= MAX_SO_BUFFERS
;
1580 properties
->maxTransformFeedbackBufferSize
= UINT32_MAX
;
1581 properties
->maxTransformFeedbackStreamDataSize
= 512;
1582 properties
->maxTransformFeedbackBufferDataSize
= UINT32_MAX
;
1583 properties
->maxTransformFeedbackBufferDataStride
= 512;
1584 properties
->transformFeedbackQueries
= !pdevice
->use_ngg_streamout
;
1585 properties
->transformFeedbackStreamsLinesTriangles
= !pdevice
->use_ngg_streamout
;
1586 properties
->transformFeedbackRasterizationStreamSelect
= false;
1587 properties
->transformFeedbackDraw
= true;
1590 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT
: {
1591 VkPhysicalDeviceInlineUniformBlockPropertiesEXT
*props
=
1592 (VkPhysicalDeviceInlineUniformBlockPropertiesEXT
*)ext
;
1594 props
->maxInlineUniformBlockSize
= MAX_INLINE_UNIFORM_BLOCK_SIZE
;
1595 props
->maxPerStageDescriptorInlineUniformBlocks
= MAX_INLINE_UNIFORM_BLOCK_SIZE
* MAX_SETS
;
1596 props
->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks
= MAX_INLINE_UNIFORM_BLOCK_SIZE
* MAX_SETS
;
1597 props
->maxDescriptorSetInlineUniformBlocks
= MAX_INLINE_UNIFORM_BLOCK_COUNT
;
1598 props
->maxDescriptorSetUpdateAfterBindInlineUniformBlocks
= MAX_INLINE_UNIFORM_BLOCK_COUNT
;
1601 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT
: {
1602 VkPhysicalDeviceSampleLocationsPropertiesEXT
*properties
=
1603 (VkPhysicalDeviceSampleLocationsPropertiesEXT
*)ext
;
1604 properties
->sampleLocationSampleCounts
= VK_SAMPLE_COUNT_2_BIT
|
1605 VK_SAMPLE_COUNT_4_BIT
|
1606 VK_SAMPLE_COUNT_8_BIT
;
1607 properties
->maxSampleLocationGridSize
= (VkExtent2D
){ 2 , 2 };
1608 properties
->sampleLocationCoordinateRange
[0] = 0.0f
;
1609 properties
->sampleLocationCoordinateRange
[1] = 0.9375f
;
1610 properties
->sampleLocationSubPixelBits
= 4;
1611 properties
->variableSampleLocations
= false;
1614 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR
: {
1615 VkPhysicalDeviceDepthStencilResolvePropertiesKHR
*properties
=
1616 (VkPhysicalDeviceDepthStencilResolvePropertiesKHR
*)ext
;
1618 /* We support all of the depth resolve modes */
1619 properties
->supportedDepthResolveModes
=
1620 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
|
1621 VK_RESOLVE_MODE_AVERAGE_BIT_KHR
|
1622 VK_RESOLVE_MODE_MIN_BIT_KHR
|
1623 VK_RESOLVE_MODE_MAX_BIT_KHR
;
1625 /* Average doesn't make sense for stencil so we don't support that */
1626 properties
->supportedStencilResolveModes
=
1627 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR
|
1628 VK_RESOLVE_MODE_MIN_BIT_KHR
|
1629 VK_RESOLVE_MODE_MAX_BIT_KHR
;
1631 properties
->independentResolveNone
= true;
1632 properties
->independentResolve
= true;
1635 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT
: {
1636 VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT
*properties
=
1637 (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT
*)ext
;
1638 properties
->storageTexelBufferOffsetAlignmentBytes
= 4;
1639 properties
->storageTexelBufferOffsetSingleTexelAlignment
= true;
1640 properties
->uniformTexelBufferOffsetAlignmentBytes
= 4;
1641 properties
->uniformTexelBufferOffsetSingleTexelAlignment
= true;
1644 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR
: {
1645 VkPhysicalDeviceFloatControlsPropertiesKHR
*properties
=
1646 (VkPhysicalDeviceFloatControlsPropertiesKHR
*)ext
;
1648 /* On AMD hardware, denormals and rounding modes for
1649 * fp16/fp64 are controlled by the same config
1652 properties
->denormBehaviorIndependence
= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR
;
1653 properties
->roundingModeIndependence
= VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR
;
1655 /* Do not allow both preserving and flushing denorms
1656 * because different shaders in the same pipeline can
1657 * have different settings and this won't work for
1658 * merged shaders. To make it work, this requires LLVM
1659 * support for changing the register. The same logic
1660 * applies for the rounding modes because they are
1661 * configured with the same config register.
1662 * TODO: we can enable a lot of these for ACO when it
1663 * supports all stages
1665 properties
->shaderDenormFlushToZeroFloat32
= true;
1666 properties
->shaderDenormPreserveFloat32
= false;
1667 properties
->shaderRoundingModeRTEFloat32
= true;
1668 properties
->shaderRoundingModeRTZFloat32
= false;
1669 properties
->shaderSignedZeroInfNanPreserveFloat32
= true;
1671 properties
->shaderDenormFlushToZeroFloat16
= false;
1672 properties
->shaderDenormPreserveFloat16
= pdevice
->rad_info
.chip_class
>= GFX8
;
1673 properties
->shaderRoundingModeRTEFloat16
= pdevice
->rad_info
.chip_class
>= GFX8
;
1674 properties
->shaderRoundingModeRTZFloat16
= false;
1675 properties
->shaderSignedZeroInfNanPreserveFloat16
= pdevice
->rad_info
.chip_class
>= GFX8
;
1677 properties
->shaderDenormFlushToZeroFloat64
= false;
1678 properties
->shaderDenormPreserveFloat64
= pdevice
->rad_info
.chip_class
>= GFX8
;
1679 properties
->shaderRoundingModeRTEFloat64
= pdevice
->rad_info
.chip_class
>= GFX8
;
1680 properties
->shaderRoundingModeRTZFloat64
= false;
1681 properties
->shaderSignedZeroInfNanPreserveFloat64
= pdevice
->rad_info
.chip_class
>= GFX8
;
1684 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR
: {
1685 VkPhysicalDeviceTimelineSemaphorePropertiesKHR
*props
=
1686 (VkPhysicalDeviceTimelineSemaphorePropertiesKHR
*) ext
;
1687 props
->maxTimelineSemaphoreValueDifference
= UINT64_MAX
;
1690 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT
: {
1691 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT
*props
=
1692 (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT
*)ext
;
1693 props
->minSubgroupSize
= 64;
1694 props
->maxSubgroupSize
= 64;
1695 props
->maxComputeWorkgroupSubgroups
= UINT32_MAX
;
1696 props
->requiredSubgroupSizeStages
= 0;
1698 if (pdevice
->rad_info
.chip_class
>= GFX10
) {
1699 /* Only GFX10+ supports wave32. */
1700 props
->minSubgroupSize
= 32;
1701 props
->requiredSubgroupSizeStages
= VK_SHADER_STAGE_COMPUTE_BIT
;
1711 static void radv_get_physical_device_queue_family_properties(
1712 struct radv_physical_device
* pdevice
,
1714 VkQueueFamilyProperties
** pQueueFamilyProperties
)
1716 int num_queue_families
= 1;
1718 if (pdevice
->rad_info
.num_rings
[RING_COMPUTE
] > 0 &&
1719 !(pdevice
->instance
->debug_flags
& RADV_DEBUG_NO_COMPUTE_QUEUE
))
1720 num_queue_families
++;
1722 if (pQueueFamilyProperties
== NULL
) {
1723 *pCount
= num_queue_families
;
1732 *pQueueFamilyProperties
[idx
] = (VkQueueFamilyProperties
) {
1733 .queueFlags
= VK_QUEUE_GRAPHICS_BIT
|
1734 VK_QUEUE_COMPUTE_BIT
|
1735 VK_QUEUE_TRANSFER_BIT
|
1736 VK_QUEUE_SPARSE_BINDING_BIT
,
1738 .timestampValidBits
= 64,
1739 .minImageTransferGranularity
= (VkExtent3D
) { 1, 1, 1 },
1744 if (pdevice
->rad_info
.num_rings
[RING_COMPUTE
] > 0 &&
1745 !(pdevice
->instance
->debug_flags
& RADV_DEBUG_NO_COMPUTE_QUEUE
)) {
1746 if (*pCount
> idx
) {
1747 *pQueueFamilyProperties
[idx
] = (VkQueueFamilyProperties
) {
1748 .queueFlags
= VK_QUEUE_COMPUTE_BIT
|
1749 VK_QUEUE_TRANSFER_BIT
|
1750 VK_QUEUE_SPARSE_BINDING_BIT
,
1751 .queueCount
= pdevice
->rad_info
.num_rings
[RING_COMPUTE
],
1752 .timestampValidBits
= 64,
1753 .minImageTransferGranularity
= (VkExtent3D
) { 1, 1, 1 },
1761 void radv_GetPhysicalDeviceQueueFamilyProperties(
1762 VkPhysicalDevice physicalDevice
,
1764 VkQueueFamilyProperties
* pQueueFamilyProperties
)
1766 RADV_FROM_HANDLE(radv_physical_device
, pdevice
, physicalDevice
);
1767 if (!pQueueFamilyProperties
) {
1768 radv_get_physical_device_queue_family_properties(pdevice
, pCount
, NULL
);
1771 VkQueueFamilyProperties
*properties
[] = {
1772 pQueueFamilyProperties
+ 0,
1773 pQueueFamilyProperties
+ 1,
1774 pQueueFamilyProperties
+ 2,
1776 radv_get_physical_device_queue_family_properties(pdevice
, pCount
, properties
);
1777 assert(*pCount
<= 3);
1780 void radv_GetPhysicalDeviceQueueFamilyProperties2(
1781 VkPhysicalDevice physicalDevice
,
1783 VkQueueFamilyProperties2
*pQueueFamilyProperties
)
1785 RADV_FROM_HANDLE(radv_physical_device
, pdevice
, physicalDevice
);
1786 if (!pQueueFamilyProperties
) {
1787 radv_get_physical_device_queue_family_properties(pdevice
, pCount
, NULL
);
1790 VkQueueFamilyProperties
*properties
[] = {
1791 &pQueueFamilyProperties
[0].queueFamilyProperties
,
1792 &pQueueFamilyProperties
[1].queueFamilyProperties
,
1793 &pQueueFamilyProperties
[2].queueFamilyProperties
,
1795 radv_get_physical_device_queue_family_properties(pdevice
, pCount
, properties
);
1796 assert(*pCount
<= 3);
1799 void radv_GetPhysicalDeviceMemoryProperties(
1800 VkPhysicalDevice physicalDevice
,
1801 VkPhysicalDeviceMemoryProperties
*pMemoryProperties
)
1803 RADV_FROM_HANDLE(radv_physical_device
, physical_device
, physicalDevice
);
1805 *pMemoryProperties
= physical_device
->memory_properties
;
1809 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice
,
1810 VkPhysicalDeviceMemoryBudgetPropertiesEXT
*memoryBudget
)
1812 RADV_FROM_HANDLE(radv_physical_device
, device
, physicalDevice
);
1813 VkPhysicalDeviceMemoryProperties
*memory_properties
= &device
->memory_properties
;
1814 uint64_t visible_vram_size
= radv_get_visible_vram_size(device
);
1815 uint64_t vram_size
= radv_get_vram_size(device
);
1816 uint64_t gtt_size
= device
->rad_info
.gart_size
;
1817 uint64_t heap_budget
, heap_usage
;
1819 /* For all memory heaps, the computation of budget is as follow:
1820 * heap_budget = heap_size - global_heap_usage + app_heap_usage
1822 * The Vulkan spec 1.1.97 says that the budget should include any
1823 * currently allocated device memory.
1825 * Note that the application heap usages are not really accurate (eg.
1826 * in presence of shared buffers).
1828 for (int i
= 0; i
< device
->memory_properties
.memoryTypeCount
; i
++) {
1829 uint32_t heap_index
= device
->memory_properties
.memoryTypes
[i
].heapIndex
;
1831 if (radv_is_mem_type_vram(device
->mem_type_indices
[i
])) {
1832 heap_usage
= device
->ws
->query_value(device
->ws
,
1833 RADEON_ALLOCATED_VRAM
);
1835 heap_budget
= vram_size
-
1836 device
->ws
->query_value(device
->ws
, RADEON_VRAM_USAGE
) +
1839 memoryBudget
->heapBudget
[heap_index
] = heap_budget
;
1840 memoryBudget
->heapUsage
[heap_index
] = heap_usage
;
1841 } else if (radv_is_mem_type_vram_visible(device
->mem_type_indices
[i
])) {
1842 heap_usage
= device
->ws
->query_value(device
->ws
,
1843 RADEON_ALLOCATED_VRAM_VIS
);
1845 heap_budget
= visible_vram_size
-
1846 device
->ws
->query_value(device
->ws
, RADEON_VRAM_VIS_USAGE
) +
1849 memoryBudget
->heapBudget
[heap_index
] = heap_budget
;
1850 memoryBudget
->heapUsage
[heap_index
] = heap_usage
;
1851 } else if (radv_is_mem_type_gtt_wc(device
->mem_type_indices
[i
])) {
1852 heap_usage
= device
->ws
->query_value(device
->ws
,
1853 RADEON_ALLOCATED_GTT
);
1855 heap_budget
= gtt_size
-
1856 device
->ws
->query_value(device
->ws
, RADEON_GTT_USAGE
) +
1859 memoryBudget
->heapBudget
[heap_index
] = heap_budget
;
1860 memoryBudget
->heapUsage
[heap_index
] = heap_usage
;
1864 /* The heapBudget and heapUsage values must be zero for array elements
1865 * greater than or equal to
1866 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
1868 for (uint32_t i
= memory_properties
->memoryHeapCount
; i
< VK_MAX_MEMORY_HEAPS
; i
++) {
1869 memoryBudget
->heapBudget
[i
] = 0;
1870 memoryBudget
->heapUsage
[i
] = 0;
1874 void radv_GetPhysicalDeviceMemoryProperties2(
1875 VkPhysicalDevice physicalDevice
,
1876 VkPhysicalDeviceMemoryProperties2
*pMemoryProperties
)
1878 radv_GetPhysicalDeviceMemoryProperties(physicalDevice
,
1879 &pMemoryProperties
->memoryProperties
);
1881 VkPhysicalDeviceMemoryBudgetPropertiesEXT
*memory_budget
=
1882 vk_find_struct(pMemoryProperties
->pNext
,
1883 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT
);
1885 radv_get_memory_budget_properties(physicalDevice
, memory_budget
);
1888 VkResult
radv_GetMemoryHostPointerPropertiesEXT(
1890 VkExternalMemoryHandleTypeFlagBits handleType
,
1891 const void *pHostPointer
,
1892 VkMemoryHostPointerPropertiesEXT
*pMemoryHostPointerProperties
)
1894 RADV_FROM_HANDLE(radv_device
, device
, _device
);
1898 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT
: {
1899 const struct radv_physical_device
*physical_device
= device
->physical_device
;
1900 uint32_t memoryTypeBits
= 0;
1901 for (int i
= 0; i
< physical_device
->memory_properties
.memoryTypeCount
; i
++) {
1902 if (radv_is_mem_type_gtt_cached(physical_device
->mem_type_indices
[i
])) {
1903 memoryTypeBits
= (1 << i
);
1907 pMemoryHostPointerProperties
->memoryTypeBits
= memoryTypeBits
;
1911 return VK_ERROR_INVALID_EXTERNAL_HANDLE
;
1915 static enum radeon_ctx_priority
1916 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT
*pObj
)
1918 /* Default to MEDIUM when a specific global priority isn't requested */
1920 return RADEON_CTX_PRIORITY_MEDIUM
;
1922 switch(pObj
->globalPriority
) {
1923 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT
:
1924 return RADEON_CTX_PRIORITY_REALTIME
;
1925 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT
:
1926 return RADEON_CTX_PRIORITY_HIGH
;
1927 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT
:
1928 return RADEON_CTX_PRIORITY_MEDIUM
;
1929 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT
:
1930 return RADEON_CTX_PRIORITY_LOW
;
1932 unreachable("Illegal global priority value");
1933 return RADEON_CTX_PRIORITY_INVALID
;
1938 radv_queue_init(struct radv_device
*device
, struct radv_queue
*queue
,
1939 uint32_t queue_family_index
, int idx
,
1940 VkDeviceQueueCreateFlags flags
,
1941 const VkDeviceQueueGlobalPriorityCreateInfoEXT
*global_priority
)
1943 queue
->_loader_data
.loaderMagic
= ICD_LOADER_MAGIC
;
1944 queue
->device
= device
;
1945 queue
->queue_family_index
= queue_family_index
;
1946 queue
->queue_idx
= idx
;
1947 queue
->priority
= radv_get_queue_global_priority(global_priority
);
1948 queue
->flags
= flags
;
1950 queue
->hw_ctx
= device
->ws
->ctx_create(device
->ws
, queue
->priority
);
1952 return vk_error(device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
1954 list_inithead(&queue
->pending_submissions
);
1955 pthread_mutex_init(&queue
->pending_mutex
, NULL
);
1961 radv_queue_finish(struct radv_queue
*queue
)
1963 pthread_mutex_destroy(&queue
->pending_mutex
);
1966 queue
->device
->ws
->ctx_destroy(queue
->hw_ctx
);
1968 if (queue
->initial_full_flush_preamble_cs
)
1969 queue
->device
->ws
->cs_destroy(queue
->initial_full_flush_preamble_cs
);
1970 if (queue
->initial_preamble_cs
)
1971 queue
->device
->ws
->cs_destroy(queue
->initial_preamble_cs
);
1972 if (queue
->continue_preamble_cs
)
1973 queue
->device
->ws
->cs_destroy(queue
->continue_preamble_cs
);
1974 if (queue
->descriptor_bo
)
1975 queue
->device
->ws
->buffer_destroy(queue
->descriptor_bo
);
1976 if (queue
->scratch_bo
)
1977 queue
->device
->ws
->buffer_destroy(queue
->scratch_bo
);
1978 if (queue
->esgs_ring_bo
)
1979 queue
->device
->ws
->buffer_destroy(queue
->esgs_ring_bo
);
1980 if (queue
->gsvs_ring_bo
)
1981 queue
->device
->ws
->buffer_destroy(queue
->gsvs_ring_bo
);
1982 if (queue
->tess_rings_bo
)
1983 queue
->device
->ws
->buffer_destroy(queue
->tess_rings_bo
);
1985 queue
->device
->ws
->buffer_destroy(queue
->gds_bo
);
1986 if (queue
->gds_oa_bo
)
1987 queue
->device
->ws
->buffer_destroy(queue
->gds_oa_bo
);
1988 if (queue
->compute_scratch_bo
)
1989 queue
->device
->ws
->buffer_destroy(queue
->compute_scratch_bo
);
1993 radv_bo_list_init(struct radv_bo_list
*bo_list
)
1995 pthread_mutex_init(&bo_list
->mutex
, NULL
);
1996 bo_list
->list
.count
= bo_list
->capacity
= 0;
1997 bo_list
->list
.bos
= NULL
;
2001 radv_bo_list_finish(struct radv_bo_list
*bo_list
)
2003 free(bo_list
->list
.bos
);
2004 pthread_mutex_destroy(&bo_list
->mutex
);
2007 static VkResult
radv_bo_list_add(struct radv_device
*device
,
2008 struct radeon_winsys_bo
*bo
)
2010 struct radv_bo_list
*bo_list
= &device
->bo_list
;
2015 if (unlikely(!device
->use_global_bo_list
))
2018 pthread_mutex_lock(&bo_list
->mutex
);
2019 if (bo_list
->list
.count
== bo_list
->capacity
) {
2020 unsigned capacity
= MAX2(4, bo_list
->capacity
* 2);
2021 void *data
= realloc(bo_list
->list
.bos
, capacity
* sizeof(struct radeon_winsys_bo
*));
2024 pthread_mutex_unlock(&bo_list
->mutex
);
2025 return VK_ERROR_OUT_OF_HOST_MEMORY
;
2028 bo_list
->list
.bos
= (struct radeon_winsys_bo
**)data
;
2029 bo_list
->capacity
= capacity
;
2032 bo_list
->list
.bos
[bo_list
->list
.count
++] = bo
;
2033 pthread_mutex_unlock(&bo_list
->mutex
);
2037 static void radv_bo_list_remove(struct radv_device
*device
,
2038 struct radeon_winsys_bo
*bo
)
2040 struct radv_bo_list
*bo_list
= &device
->bo_list
;
2045 if (unlikely(!device
->use_global_bo_list
))
2048 pthread_mutex_lock(&bo_list
->mutex
);
2049 for(unsigned i
= 0; i
< bo_list
->list
.count
; ++i
) {
2050 if (bo_list
->list
.bos
[i
] == bo
) {
2051 bo_list
->list
.bos
[i
] = bo_list
->list
.bos
[bo_list
->list
.count
- 1];
2052 --bo_list
->list
.count
;
2056 pthread_mutex_unlock(&bo_list
->mutex
);
2060 radv_device_init_gs_info(struct radv_device
*device
)
2062 device
->gs_table_depth
= ac_get_gs_table_depth(device
->physical_device
->rad_info
.chip_class
,
2063 device
->physical_device
->rad_info
.family
);
2066 static int radv_get_device_extension_index(const char *name
)
2068 for (unsigned i
= 0; i
< RADV_DEVICE_EXTENSION_COUNT
; ++i
) {
2069 if (strcmp(name
, radv_device_extensions
[i
].extensionName
) == 0)
2076 radv_get_int_debug_option(const char *name
, int default_value
)
2083 result
= default_value
;
2087 result
= strtol(str
, &endptr
, 0);
2088 if (str
== endptr
) {
2089 /* No digits founs. */
2090 result
= default_value
;
2097 static int install_seccomp_filter() {
2099 struct sock_filter filter
[] = {
2100 /* Check arch is 64bit x86 */
2101 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, arch
))),
2102 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, AUDIT_ARCH_X86_64
, 0, 12),
2104 /* Futex is required for mutex locks */
2105 #if defined __NR__newselect
2106 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2107 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR__newselect
, 11, 0),
2108 #elif defined __NR_select
2109 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2110 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_select
, 11, 0),
2112 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2113 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_pselect6
, 11, 0),
2116 /* Allow system exit calls for the forked process */
2117 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2118 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_exit_group
, 9, 0),
2120 /* Allow system read calls */
2121 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2122 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_read
, 7, 0),
2124 /* Allow system write calls */
2125 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2126 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_write
, 5, 0),
2128 /* Allow system brk calls (we need this for malloc) */
2129 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2130 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_brk
, 3, 0),
2132 /* Futex is required for mutex locks */
2133 BPF_STMT(BPF_LD
+ BPF_W
+ BPF_ABS
, (offsetof(struct seccomp_data
, nr
))),
2134 BPF_JUMP(BPF_JMP
+ BPF_JEQ
+ BPF_K
, __NR_futex
, 1, 0),
2136 /* Return error if we hit a system call not on the whitelist */
2137 BPF_STMT(BPF_RET
+ BPF_K
, SECCOMP_RET_ERRNO
| (EPERM
& SECCOMP_RET_DATA
)),
2139 /* Allow whitelisted system calls */
2140 BPF_STMT(BPF_RET
+ BPF_K
, SECCOMP_RET_ALLOW
),
2143 struct sock_fprog prog
= {
2144 .len
= (unsigned short)(sizeof(filter
) / sizeof(filter
[0])),
2148 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0))
2151 if (prctl(PR_SET_SECCOMP
, SECCOMP_MODE_FILTER
, &prog
))
2157 /* Helper function with timeout support for reading from the pipe between
2158 * processes used for secure compile.
2160 bool radv_sc_read(int fd
, void *buf
, size_t size
, bool timeout
)
2169 /* We can't rely on the value of tv after calling select() so
2170 * we must reset it on each iteration of the loop.
2175 int rval
= select(fd
+ 1, &fds
, NULL
, NULL
, timeout
? &tv
: NULL
);
2181 ssize_t bytes_read
= read(fd
, buf
, size
);
2190 /* select timeout */
2196 static bool radv_close_all_fds(const int *keep_fds
, int keep_fd_count
)
2200 d
= opendir("/proc/self/fd");
2203 int dir_fd
= dirfd(d
);
2205 while ((dir
= readdir(d
)) != NULL
) {
2206 if (dir
->d_name
[0] == '.')
2209 int fd
= atoi(dir
->d_name
);
2214 for (int i
= 0; !keep
&& i
< keep_fd_count
; ++i
)
2215 if (keep_fds
[i
] == fd
)
2227 static bool secure_compile_open_fifo_fds(struct radv_secure_compile_state
*sc
,
2228 int *fd_server
, int *fd_client
,
2229 unsigned process
, bool make_fifo
)
2231 bool result
= false;
2232 char *fifo_server_path
= NULL
;
2233 char *fifo_client_path
= NULL
;
2235 if (asprintf(&fifo_server_path
, "/tmp/radv_server_%s_%u", sc
->uid
, process
) == -1)
2236 goto open_fifo_exit
;
2238 if (asprintf(&fifo_client_path
, "/tmp/radv_client_%s_%u", sc
->uid
, process
) == -1)
2239 goto open_fifo_exit
;
2242 int file1
= mkfifo(fifo_server_path
, 0666);
2244 goto open_fifo_exit
;
2246 int file2
= mkfifo(fifo_client_path
, 0666);
2248 goto open_fifo_exit
;
2251 *fd_server
= open(fifo_server_path
, O_RDWR
);
2253 goto open_fifo_exit
;
2255 *fd_client
= open(fifo_client_path
, O_RDWR
);
2256 if(*fd_client
< 1) {
2258 goto open_fifo_exit
;
2264 free(fifo_server_path
);
2265 free(fifo_client_path
);
2270 static void run_secure_compile_device(struct radv_device
*device
, unsigned process
,
2271 int fd_idle_device_output
)
2273 int fd_secure_input
;
2274 int fd_secure_output
;
2275 bool fifo_result
= secure_compile_open_fifo_fds(device
->sc_state
,
2280 enum radv_secure_compile_type sc_type
;
2282 const int needed_fds
[] = {
2285 fd_idle_device_output
,
2288 if (!fifo_result
|| !radv_close_all_fds(needed_fds
, ARRAY_SIZE(needed_fds
)) ||
2289 install_seccomp_filter() == -1) {
2290 sc_type
= RADV_SC_TYPE_INIT_FAILURE
;
2292 sc_type
= RADV_SC_TYPE_INIT_SUCCESS
;
2293 device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
= fd_secure_input
;
2294 device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
= fd_secure_output
;
2297 write(fd_idle_device_output
, &sc_type
, sizeof(sc_type
));
2299 if (sc_type
== RADV_SC_TYPE_INIT_FAILURE
)
2300 goto secure_compile_exit
;
2303 radv_sc_read(fd_secure_input
, &sc_type
, sizeof(sc_type
), false);
2305 if (sc_type
== RADV_SC_TYPE_COMPILE_PIPELINE
) {
2306 struct radv_pipeline
*pipeline
;
2307 bool sc_read
= true;
2309 pipeline
= vk_zalloc2(&device
->alloc
, NULL
, sizeof(*pipeline
), 8,
2310 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
2312 pipeline
->device
= device
;
2314 /* Read pipeline layout */
2315 struct radv_pipeline_layout layout
;
2316 sc_read
= radv_sc_read(fd_secure_input
, &layout
, sizeof(struct radv_pipeline_layout
), true);
2317 sc_read
&= radv_sc_read(fd_secure_input
, &layout
.num_sets
, sizeof(uint32_t), true);
2319 goto secure_compile_exit
;
2321 for (uint32_t set
= 0; set
< layout
.num_sets
; set
++) {
2322 uint32_t layout_size
;
2323 sc_read
&= radv_sc_read(fd_secure_input
, &layout_size
, sizeof(uint32_t), true);
2325 goto secure_compile_exit
;
2327 layout
.set
[set
].layout
= malloc(layout_size
);
2328 layout
.set
[set
].layout
->layout_size
= layout_size
;
2329 sc_read
&= radv_sc_read(fd_secure_input
, layout
.set
[set
].layout
,
2330 layout
.set
[set
].layout
->layout_size
, true);
2333 pipeline
->layout
= &layout
;
2335 /* Read pipeline key */
2336 struct radv_pipeline_key key
;
2337 sc_read
&= radv_sc_read(fd_secure_input
, &key
, sizeof(struct radv_pipeline_key
), true);
2339 /* Read pipeline create flags */
2340 VkPipelineCreateFlags flags
;
2341 sc_read
&= radv_sc_read(fd_secure_input
, &flags
, sizeof(VkPipelineCreateFlags
), true);
2343 /* Read stage and shader information */
2344 uint32_t num_stages
;
2345 const VkPipelineShaderStageCreateInfo
*pStages
[MESA_SHADER_STAGES
] = { 0, };
2346 sc_read
&= radv_sc_read(fd_secure_input
, &num_stages
, sizeof(uint32_t), true);
2348 goto secure_compile_exit
;
2350 for (uint32_t i
= 0; i
< num_stages
; i
++) {
2353 gl_shader_stage stage
;
2354 sc_read
&= radv_sc_read(fd_secure_input
, &stage
, sizeof(gl_shader_stage
), true);
2356 VkPipelineShaderStageCreateInfo
*pStage
= calloc(1, sizeof(VkPipelineShaderStageCreateInfo
));
2358 /* Read entry point name */
2360 sc_read
&= radv_sc_read(fd_secure_input
, &name_size
, sizeof(size_t), true);
2362 goto secure_compile_exit
;
2364 char *ep_name
= malloc(name_size
);
2365 sc_read
&= radv_sc_read(fd_secure_input
, ep_name
, name_size
, true);
2366 pStage
->pName
= ep_name
;
2368 /* Read shader module */
2370 sc_read
&= radv_sc_read(fd_secure_input
, &module_size
, sizeof(size_t), true);
2372 goto secure_compile_exit
;
2374 struct radv_shader_module
*module
= malloc(module_size
);
2375 sc_read
&= radv_sc_read(fd_secure_input
, module
, module_size
, true);
2376 pStage
->module
= radv_shader_module_to_handle(module
);
2378 /* Read specialization info */
2380 sc_read
&= radv_sc_read(fd_secure_input
, &has_spec_info
, sizeof(bool), true);
2382 goto secure_compile_exit
;
2384 if (has_spec_info
) {
2385 VkSpecializationInfo
*specInfo
= malloc(sizeof(VkSpecializationInfo
));
2386 pStage
->pSpecializationInfo
= specInfo
;
2388 sc_read
&= radv_sc_read(fd_secure_input
, &specInfo
->dataSize
, sizeof(size_t), true);
2390 goto secure_compile_exit
;
2392 void *si_data
= malloc(specInfo
->dataSize
);
2393 sc_read
&= radv_sc_read(fd_secure_input
, si_data
, specInfo
->dataSize
, true);
2394 specInfo
->pData
= si_data
;
2396 sc_read
&= radv_sc_read(fd_secure_input
, &specInfo
->mapEntryCount
, sizeof(uint32_t), true);
2398 goto secure_compile_exit
;
2400 VkSpecializationMapEntry
*mapEntries
= malloc(sizeof(VkSpecializationMapEntry
) * specInfo
->mapEntryCount
);
2401 for (uint32_t j
= 0; j
< specInfo
->mapEntryCount
; j
++) {
2402 sc_read
&= radv_sc_read(fd_secure_input
, &mapEntries
[j
], sizeof(VkSpecializationMapEntry
), true);
2404 goto secure_compile_exit
;
2407 specInfo
->pMapEntries
= mapEntries
;
2410 pStages
[stage
] = pStage
;
2413 /* Compile the shaders */
2414 VkPipelineCreationFeedbackEXT
*stage_feedbacks
[MESA_SHADER_STAGES
] = { 0 };
2415 radv_create_shaders(pipeline
, device
, NULL
, &key
, pStages
, flags
, NULL
, stage_feedbacks
);
2417 /* free memory allocated above */
2418 for (uint32_t set
= 0; set
< layout
.num_sets
; set
++)
2419 free(layout
.set
[set
].layout
);
2421 for (uint32_t i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
2425 free((void *) pStages
[i
]->pName
);
2426 free(radv_shader_module_from_handle(pStages
[i
]->module
));
2427 if (pStages
[i
]->pSpecializationInfo
) {
2428 free((void *) pStages
[i
]->pSpecializationInfo
->pData
);
2429 free((void *) pStages
[i
]->pSpecializationInfo
->pMapEntries
);
2430 free((void *) pStages
[i
]->pSpecializationInfo
);
2432 free((void *) pStages
[i
]);
2435 vk_free(&device
->alloc
, pipeline
);
2437 sc_type
= RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED
;
2438 write(fd_secure_output
, &sc_type
, sizeof(sc_type
));
2440 } else if (sc_type
== RADV_SC_TYPE_DESTROY_DEVICE
) {
2441 goto secure_compile_exit
;
2445 secure_compile_exit
:
2446 close(fd_secure_input
);
2447 close(fd_secure_output
);
2448 close(fd_idle_device_output
);
2452 static enum radv_secure_compile_type
fork_secure_compile_device(struct radv_device
*device
, unsigned process
)
2454 int fd_secure_input
[2];
2455 int fd_secure_output
[2];
2457 /* create pipe descriptors (used to communicate between processes) */
2458 if (pipe(fd_secure_input
) == -1 || pipe(fd_secure_output
) == -1)
2459 return RADV_SC_TYPE_INIT_FAILURE
;
2463 if ((sc_pid
= fork()) == 0) {
2464 device
->sc_state
->secure_compile_thread_counter
= process
;
2465 run_secure_compile_device(device
, process
, fd_secure_output
[1]);
2468 return RADV_SC_TYPE_INIT_FAILURE
;
2470 /* Read the init result returned from the secure process */
2471 enum radv_secure_compile_type sc_type
;
2472 bool sc_read
= radv_sc_read(fd_secure_output
[0], &sc_type
, sizeof(sc_type
), true);
2474 if (sc_type
== RADV_SC_TYPE_INIT_FAILURE
|| !sc_read
) {
2475 close(fd_secure_input
[0]);
2476 close(fd_secure_input
[1]);
2477 close(fd_secure_output
[1]);
2478 close(fd_secure_output
[0]);
2480 waitpid(sc_pid
, &status
, 0);
2482 return RADV_SC_TYPE_INIT_FAILURE
;
2484 assert(sc_type
== RADV_SC_TYPE_INIT_SUCCESS
);
2485 write(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
, &sc_type
, sizeof(sc_type
));
2487 close(fd_secure_input
[0]);
2488 close(fd_secure_input
[1]);
2489 close(fd_secure_output
[1]);
2490 close(fd_secure_output
[0]);
2493 waitpid(sc_pid
, &status
, 0);
2497 return RADV_SC_TYPE_INIT_SUCCESS
;
2500 /* Run a bare bones fork of a device that was forked right after its creation.
2501 * This device will have low overhead when it is forked again before each
2502 * pipeline compilation. This device sits idle and its only job is to fork
2505 static void run_secure_compile_idle_device(struct radv_device
*device
, unsigned process
,
2506 int fd_secure_input
, int fd_secure_output
)
2508 enum radv_secure_compile_type sc_type
= RADV_SC_TYPE_INIT_SUCCESS
;
2509 device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
= fd_secure_input
;
2510 device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
= fd_secure_output
;
2512 write(fd_secure_output
, &sc_type
, sizeof(sc_type
));
2515 radv_sc_read(fd_secure_input
, &sc_type
, sizeof(sc_type
), false);
2517 if (sc_type
== RADV_SC_TYPE_FORK_DEVICE
) {
2518 sc_type
= fork_secure_compile_device(device
, process
);
2520 if (sc_type
== RADV_SC_TYPE_INIT_FAILURE
)
2521 goto secure_compile_exit
;
2523 } else if (sc_type
== RADV_SC_TYPE_DESTROY_DEVICE
) {
2524 goto secure_compile_exit
;
2528 secure_compile_exit
:
2529 close(fd_secure_input
);
2530 close(fd_secure_output
);
2534 static void destroy_secure_compile_device(struct radv_device
*device
, unsigned process
)
2536 int fd_secure_input
= device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
;
2538 enum radv_secure_compile_type sc_type
= RADV_SC_TYPE_DESTROY_DEVICE
;
2539 write(fd_secure_input
, &sc_type
, sizeof(sc_type
));
2541 close(device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
);
2542 close(device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
);
2545 waitpid(device
->sc_state
->secure_compile_processes
[process
].sc_pid
, &status
, 0);
2548 static VkResult
fork_secure_compile_idle_device(struct radv_device
*device
)
2550 device
->sc_state
= vk_zalloc(&device
->alloc
,
2551 sizeof(struct radv_secure_compile_state
),
2552 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
2554 mtx_init(&device
->sc_state
->secure_compile_mutex
, mtx_plain
);
2556 pid_t upid
= getpid();
2557 time_t seconds
= time(NULL
);
2560 if (asprintf(&uid
, "%ld_%ld", (long) upid
, (long) seconds
) == -1)
2561 return VK_ERROR_INITIALIZATION_FAILED
;
2563 device
->sc_state
->uid
= uid
;
2565 uint8_t sc_threads
= device
->instance
->num_sc_threads
;
2566 int fd_secure_input
[MAX_SC_PROCS
][2];
2567 int fd_secure_output
[MAX_SC_PROCS
][2];
2569 /* create pipe descriptors (used to communicate between processes) */
2570 for (unsigned i
= 0; i
< sc_threads
; i
++) {
2571 if (pipe(fd_secure_input
[i
]) == -1 ||
2572 pipe(fd_secure_output
[i
]) == -1) {
2573 return VK_ERROR_INITIALIZATION_FAILED
;
2577 device
->sc_state
->secure_compile_processes
= vk_zalloc(&device
->alloc
,
2578 sizeof(struct radv_secure_compile_process
) * sc_threads
, 8,
2579 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
2581 for (unsigned process
= 0; process
< sc_threads
; process
++) {
2582 if ((device
->sc_state
->secure_compile_processes
[process
].sc_pid
= fork()) == 0) {
2583 device
->sc_state
->secure_compile_thread_counter
= process
;
2584 run_secure_compile_idle_device(device
, process
, fd_secure_input
[process
][0], fd_secure_output
[process
][1]);
2586 if (device
->sc_state
->secure_compile_processes
[process
].sc_pid
== -1)
2587 return VK_ERROR_INITIALIZATION_FAILED
;
2589 /* Read the init result returned from the secure process */
2590 enum radv_secure_compile_type sc_type
;
2591 bool sc_read
= radv_sc_read(fd_secure_output
[process
][0], &sc_type
, sizeof(sc_type
), true);
2594 if (sc_read
&& sc_type
== RADV_SC_TYPE_INIT_SUCCESS
) {
2595 fifo_result
= secure_compile_open_fifo_fds(device
->sc_state
,
2596 &device
->sc_state
->secure_compile_processes
[process
].fd_server
,
2597 &device
->sc_state
->secure_compile_processes
[process
].fd_client
,
2600 device
->sc_state
->secure_compile_processes
[process
].fd_secure_input
= fd_secure_input
[process
][1];
2601 device
->sc_state
->secure_compile_processes
[process
].fd_secure_output
= fd_secure_output
[process
][0];
2604 if (sc_type
== RADV_SC_TYPE_INIT_FAILURE
|| !sc_read
|| !fifo_result
) {
2605 close(fd_secure_input
[process
][0]);
2606 close(fd_secure_input
[process
][1]);
2607 close(fd_secure_output
[process
][1]);
2608 close(fd_secure_output
[process
][0]);
2610 waitpid(device
->sc_state
->secure_compile_processes
[process
].sc_pid
, &status
, 0);
2612 /* Destroy any forks that were created sucessfully */
2613 for (unsigned i
= 0; i
< process
; i
++) {
2614 destroy_secure_compile_device(device
, i
);
2617 return VK_ERROR_INITIALIZATION_FAILED
;
2625 radv_create_pthread_cond(pthread_cond_t
*cond
)
2627 pthread_condattr_t condattr
;
2628 if (pthread_condattr_init(&condattr
)) {
2629 return VK_ERROR_INITIALIZATION_FAILED
;
2632 if (pthread_condattr_setclock(&condattr
, CLOCK_MONOTONIC
)) {
2633 pthread_condattr_destroy(&condattr
);
2634 return VK_ERROR_INITIALIZATION_FAILED
;
2636 if (pthread_cond_init(cond
, &condattr
)) {
2637 pthread_condattr_destroy(&condattr
);
2638 return VK_ERROR_INITIALIZATION_FAILED
;
2640 pthread_condattr_destroy(&condattr
);
2644 VkResult
radv_CreateDevice(
2645 VkPhysicalDevice physicalDevice
,
2646 const VkDeviceCreateInfo
* pCreateInfo
,
2647 const VkAllocationCallbacks
* pAllocator
,
2650 RADV_FROM_HANDLE(radv_physical_device
, physical_device
, physicalDevice
);
2652 struct radv_device
*device
;
2654 bool keep_shader_info
= false;
2656 /* Check enabled features */
2657 if (pCreateInfo
->pEnabledFeatures
) {
2658 VkPhysicalDeviceFeatures supported_features
;
2659 radv_GetPhysicalDeviceFeatures(physicalDevice
, &supported_features
);
2660 VkBool32
*supported_feature
= (VkBool32
*)&supported_features
;
2661 VkBool32
*enabled_feature
= (VkBool32
*)pCreateInfo
->pEnabledFeatures
;
2662 unsigned num_features
= sizeof(VkPhysicalDeviceFeatures
) / sizeof(VkBool32
);
2663 for (uint32_t i
= 0; i
< num_features
; i
++) {
2664 if (enabled_feature
[i
] && !supported_feature
[i
])
2665 return vk_error(physical_device
->instance
, VK_ERROR_FEATURE_NOT_PRESENT
);
2669 device
= vk_zalloc2(&physical_device
->instance
->alloc
, pAllocator
,
2671 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
2673 return vk_error(physical_device
->instance
, VK_ERROR_OUT_OF_HOST_MEMORY
);
2675 device
->_loader_data
.loaderMagic
= ICD_LOADER_MAGIC
;
2676 device
->instance
= physical_device
->instance
;
2677 device
->physical_device
= physical_device
;
2679 device
->ws
= physical_device
->ws
;
2681 device
->alloc
= *pAllocator
;
2683 device
->alloc
= physical_device
->instance
->alloc
;
2685 for (uint32_t i
= 0; i
< pCreateInfo
->enabledExtensionCount
; i
++) {
2686 const char *ext_name
= pCreateInfo
->ppEnabledExtensionNames
[i
];
2687 int index
= radv_get_device_extension_index(ext_name
);
2688 if (index
< 0 || !physical_device
->supported_extensions
.extensions
[index
]) {
2689 vk_free(&device
->alloc
, device
);
2690 return vk_error(physical_device
->instance
, VK_ERROR_EXTENSION_NOT_PRESENT
);
2693 device
->enabled_extensions
.extensions
[index
] = true;
2696 keep_shader_info
= device
->enabled_extensions
.AMD_shader_info
;
2698 /* With update after bind we can't attach bo's to the command buffer
2699 * from the descriptor set anymore, so we have to use a global BO list.
2701 device
->use_global_bo_list
=
2702 (device
->instance
->perftest_flags
& RADV_PERFTEST_BO_LIST
) ||
2703 device
->enabled_extensions
.EXT_descriptor_indexing
||
2704 device
->enabled_extensions
.EXT_buffer_device_address
||
2705 device
->enabled_extensions
.KHR_buffer_device_address
;
2707 device
->robust_buffer_access
= pCreateInfo
->pEnabledFeatures
&&
2708 pCreateInfo
->pEnabledFeatures
->robustBufferAccess
;
2710 mtx_init(&device
->shader_slab_mutex
, mtx_plain
);
2711 list_inithead(&device
->shader_slabs
);
2713 radv_bo_list_init(&device
->bo_list
);
2715 for (unsigned i
= 0; i
< pCreateInfo
->queueCreateInfoCount
; i
++) {
2716 const VkDeviceQueueCreateInfo
*queue_create
= &pCreateInfo
->pQueueCreateInfos
[i
];
2717 uint32_t qfi
= queue_create
->queueFamilyIndex
;
2718 const VkDeviceQueueGlobalPriorityCreateInfoEXT
*global_priority
=
2719 vk_find_struct_const(queue_create
->pNext
, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT
);
2721 assert(!global_priority
|| device
->physical_device
->rad_info
.has_ctx_priority
);
2723 device
->queues
[qfi
] = vk_alloc(&device
->alloc
,
2724 queue_create
->queueCount
* sizeof(struct radv_queue
), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE
);
2725 if (!device
->queues
[qfi
]) {
2726 result
= VK_ERROR_OUT_OF_HOST_MEMORY
;
2730 memset(device
->queues
[qfi
], 0, queue_create
->queueCount
* sizeof(struct radv_queue
));
2732 device
->queue_count
[qfi
] = queue_create
->queueCount
;
2734 for (unsigned q
= 0; q
< queue_create
->queueCount
; q
++) {
2735 result
= radv_queue_init(device
, &device
->queues
[qfi
][q
],
2736 qfi
, q
, queue_create
->flags
,
2738 if (result
!= VK_SUCCESS
)
2743 device
->pbb_allowed
= device
->physical_device
->rad_info
.chip_class
>= GFX9
&&
2744 !(device
->instance
->debug_flags
& RADV_DEBUG_NOBINNING
);
2746 /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
2747 device
->dfsm_allowed
= device
->pbb_allowed
&&
2748 (device
->instance
->perftest_flags
& RADV_PERFTEST_DFSM
);
2750 device
->always_use_syncobj
= device
->physical_device
->rad_info
.has_syncobj_wait_for_submit
;
2752 /* The maximum number of scratch waves. Scratch space isn't divided
2753 * evenly between CUs. The number is only a function of the number of CUs.
2754 * We can decrease the constant to decrease the scratch buffer size.
2756 * sctx->scratch_waves must be >= the maximum possible size of
2757 * 1 threadgroup, so that the hw doesn't hang from being unable
2760 * The recommended value is 4 per CU at most. Higher numbers don't
2761 * bring much benefit, but they still occupy chip resources (think
2762 * async compute). I've seen ~2% performance difference between 4 and 32.
2764 uint32_t max_threads_per_block
= 2048;
2765 device
->scratch_waves
= MAX2(32 * physical_device
->rad_info
.num_good_compute_units
,
2766 max_threads_per_block
/ 64);
2768 device
->dispatch_initiator
= S_00B800_COMPUTE_SHADER_EN(1);
2770 if (device
->physical_device
->rad_info
.chip_class
>= GFX7
) {
2771 /* If the KMD allows it (there is a KMD hw register for it),
2772 * allow launching waves out-of-order.
2774 device
->dispatch_initiator
|= S_00B800_ORDER_MODE(1);
2777 radv_device_init_gs_info(device
);
2779 device
->tess_offchip_block_dw_size
=
2780 device
->physical_device
->rad_info
.family
== CHIP_HAWAII
? 4096 : 8192;
2782 if (getenv("RADV_TRACE_FILE")) {
2783 const char *filename
= getenv("RADV_TRACE_FILE");
2785 keep_shader_info
= true;
2787 if (!radv_init_trace(device
))
2790 fprintf(stderr
, "*****************************************************************************\n");
2791 fprintf(stderr
, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
2792 fprintf(stderr
, "*****************************************************************************\n");
2794 fprintf(stderr
, "Trace file will be dumped to %s\n", filename
);
2795 radv_dump_enabled_options(device
, stderr
);
2798 /* Temporarily disable secure compile while we create meta shaders, etc */
2799 uint8_t sc_threads
= device
->instance
->num_sc_threads
;
2801 device
->instance
->num_sc_threads
= 0;
2803 device
->keep_shader_info
= keep_shader_info
;
2804 result
= radv_device_init_meta(device
);
2805 if (result
!= VK_SUCCESS
)
2808 radv_device_init_msaa(device
);
2810 for (int family
= 0; family
< RADV_MAX_QUEUE_FAMILIES
; ++family
) {
2811 device
->empty_cs
[family
] = device
->ws
->cs_create(device
->ws
, family
);
2813 case RADV_QUEUE_GENERAL
:
2814 radeon_emit(device
->empty_cs
[family
], PKT3(PKT3_CONTEXT_CONTROL
, 1, 0));
2815 radeon_emit(device
->empty_cs
[family
], CONTEXT_CONTROL_LOAD_ENABLE(1));
2816 radeon_emit(device
->empty_cs
[family
], CONTEXT_CONTROL_SHADOW_ENABLE(1));
2818 case RADV_QUEUE_COMPUTE
:
2819 radeon_emit(device
->empty_cs
[family
], PKT3(PKT3_NOP
, 0, 0));
2820 radeon_emit(device
->empty_cs
[family
], 0);
2823 device
->ws
->cs_finalize(device
->empty_cs
[family
]);
2826 if (device
->physical_device
->rad_info
.chip_class
>= GFX7
)
2827 cik_create_gfx_config(device
);
2829 VkPipelineCacheCreateInfo ci
;
2830 ci
.sType
= VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO
;
2833 ci
.pInitialData
= NULL
;
2834 ci
.initialDataSize
= 0;
2836 result
= radv_CreatePipelineCache(radv_device_to_handle(device
),
2838 if (result
!= VK_SUCCESS
)
2841 device
->mem_cache
= radv_pipeline_cache_from_handle(pc
);
2843 result
= radv_create_pthread_cond(&device
->timeline_cond
);
2844 if (result
!= VK_SUCCESS
)
2845 goto fail_mem_cache
;
2847 device
->force_aniso
=
2848 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
2849 if (device
->force_aniso
>= 0) {
2850 fprintf(stderr
, "radv: Forcing anisotropy filter to %ix\n",
2851 1 << util_logbase2(device
->force_aniso
));
2854 /* Fork device for secure compile as required */
2855 device
->instance
->num_sc_threads
= sc_threads
;
2856 if (radv_device_use_secure_compile(device
->instance
)) {
2858 result
= fork_secure_compile_idle_device(device
);
2859 if (result
!= VK_SUCCESS
)
2863 *pDevice
= radv_device_to_handle(device
);
2867 radv_DestroyPipelineCache(radv_device_to_handle(device
), pc
, NULL
);
2869 radv_device_finish_meta(device
);
2871 radv_bo_list_finish(&device
->bo_list
);
2873 if (device
->trace_bo
)
2874 device
->ws
->buffer_destroy(device
->trace_bo
);
2876 if (device
->gfx_init
)
2877 device
->ws
->buffer_destroy(device
->gfx_init
);
2879 for (unsigned i
= 0; i
< RADV_MAX_QUEUE_FAMILIES
; i
++) {
2880 for (unsigned q
= 0; q
< device
->queue_count
[i
]; q
++)
2881 radv_queue_finish(&device
->queues
[i
][q
]);
2882 if (device
->queue_count
[i
])
2883 vk_free(&device
->alloc
, device
->queues
[i
]);
2886 vk_free(&device
->alloc
, device
);
2890 void radv_DestroyDevice(
2892 const VkAllocationCallbacks
* pAllocator
)
2894 RADV_FROM_HANDLE(radv_device
, device
, _device
);
2899 if (device
->trace_bo
)
2900 device
->ws
->buffer_destroy(device
->trace_bo
);
2902 if (device
->gfx_init
)
2903 device
->ws
->buffer_destroy(device
->gfx_init
);
2905 for (unsigned i
= 0; i
< RADV_MAX_QUEUE_FAMILIES
; i
++) {
2906 for (unsigned q
= 0; q
< device
->queue_count
[i
]; q
++)
2907 radv_queue_finish(&device
->queues
[i
][q
]);
2908 if (device
->queue_count
[i
])
2909 vk_free(&device
->alloc
, device
->queues
[i
]);
2910 if (device
->empty_cs
[i
])
2911 device
->ws
->cs_destroy(device
->empty_cs
[i
]);
2913 radv_device_finish_meta(device
);
2915 VkPipelineCache pc
= radv_pipeline_cache_to_handle(device
->mem_cache
);
2916 radv_DestroyPipelineCache(radv_device_to_handle(device
), pc
, NULL
);
2918 radv_destroy_shader_slabs(device
);
2920 pthread_cond_destroy(&device
->timeline_cond
);
2921 radv_bo_list_finish(&device
->bo_list
);
2922 if (radv_device_use_secure_compile(device
->instance
)) {
2923 for (unsigned i
= 0; i
< device
->instance
->num_sc_threads
; i
++ ) {
2924 destroy_secure_compile_device(device
, i
);
2928 if (device
->sc_state
) {
2929 free(device
->sc_state
->uid
);
2930 vk_free(&device
->alloc
, device
->sc_state
->secure_compile_processes
);
2932 vk_free(&device
->alloc
, device
->sc_state
);
2933 vk_free(&device
->alloc
, device
);
2936 VkResult
radv_EnumerateInstanceLayerProperties(
2937 uint32_t* pPropertyCount
,
2938 VkLayerProperties
* pProperties
)
2940 if (pProperties
== NULL
) {
2941 *pPropertyCount
= 0;
2945 /* None supported at this time */
2946 return vk_error(NULL
, VK_ERROR_LAYER_NOT_PRESENT
);
2949 VkResult
radv_EnumerateDeviceLayerProperties(
2950 VkPhysicalDevice physicalDevice
,
2951 uint32_t* pPropertyCount
,
2952 VkLayerProperties
* pProperties
)
2954 if (pProperties
== NULL
) {
2955 *pPropertyCount
= 0;
2959 /* None supported at this time */
2960 return vk_error(NULL
, VK_ERROR_LAYER_NOT_PRESENT
);
2963 void radv_GetDeviceQueue2(
2965 const VkDeviceQueueInfo2
* pQueueInfo
,
2968 RADV_FROM_HANDLE(radv_device
, device
, _device
);
2969 struct radv_queue
*queue
;
2971 queue
= &device
->queues
[pQueueInfo
->queueFamilyIndex
][pQueueInfo
->queueIndex
];
2972 if (pQueueInfo
->flags
!= queue
->flags
) {
2973 /* From the Vulkan 1.1.70 spec:
2975 * "The queue returned by vkGetDeviceQueue2 must have the same
2976 * flags value from this structure as that used at device
2977 * creation time in a VkDeviceQueueCreateInfo instance. If no
2978 * matching flags were specified at device creation time then
2979 * pQueue will return VK_NULL_HANDLE."
2981 *pQueue
= VK_NULL_HANDLE
;
2985 *pQueue
= radv_queue_to_handle(queue
);
2988 void radv_GetDeviceQueue(
2990 uint32_t queueFamilyIndex
,
2991 uint32_t queueIndex
,
2994 const VkDeviceQueueInfo2 info
= (VkDeviceQueueInfo2
) {
2995 .sType
= VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2
,
2996 .queueFamilyIndex
= queueFamilyIndex
,
2997 .queueIndex
= queueIndex
3000 radv_GetDeviceQueue2(_device
, &info
, pQueue
);
3004 fill_geom_tess_rings(struct radv_queue
*queue
,
3006 bool add_sample_positions
,
3007 uint32_t esgs_ring_size
,
3008 struct radeon_winsys_bo
*esgs_ring_bo
,
3009 uint32_t gsvs_ring_size
,
3010 struct radeon_winsys_bo
*gsvs_ring_bo
,
3011 uint32_t tess_factor_ring_size
,
3012 uint32_t tess_offchip_ring_offset
,
3013 uint32_t tess_offchip_ring_size
,
3014 struct radeon_winsys_bo
*tess_rings_bo
)
3016 uint32_t *desc
= &map
[4];
3019 uint64_t esgs_va
= radv_buffer_get_va(esgs_ring_bo
);
3021 /* stride 0, num records - size, add tid, swizzle, elsize4,
3024 desc
[1] = S_008F04_BASE_ADDRESS_HI(esgs_va
>> 32) |
3025 S_008F04_SWIZZLE_ENABLE(true);
3026 desc
[2] = esgs_ring_size
;
3027 desc
[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
3028 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
3029 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
3030 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
) |
3031 S_008F0C_INDEX_STRIDE(3) |
3032 S_008F0C_ADD_TID_ENABLE(1);
3034 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3035 desc
[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
3036 S_008F0C_OOB_SELECT(2) |
3037 S_008F0C_RESOURCE_LEVEL(1);
3039 desc
[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
3040 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
) |
3041 S_008F0C_ELEMENT_SIZE(1);
3044 /* GS entry for ES->GS ring */
3045 /* stride 0, num records - size, elsize0,
3048 desc
[5] = S_008F04_BASE_ADDRESS_HI(esgs_va
>> 32);
3049 desc
[6] = esgs_ring_size
;
3050 desc
[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
3051 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
3052 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
3053 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
3055 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3056 desc
[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
3057 S_008F0C_OOB_SELECT(2) |
3058 S_008F0C_RESOURCE_LEVEL(1);
3060 desc
[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
3061 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
3068 uint64_t gsvs_va
= radv_buffer_get_va(gsvs_ring_bo
);
3070 /* VS entry for GS->VS ring */
3071 /* stride 0, num records - size, elsize0,
3074 desc
[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va
>> 32);
3075 desc
[2] = gsvs_ring_size
;
3076 desc
[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
3077 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
3078 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
3079 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
3081 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3082 desc
[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
3083 S_008F0C_OOB_SELECT(2) |
3084 S_008F0C_RESOURCE_LEVEL(1);
3086 desc
[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
3087 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
3090 /* stride gsvs_itemsize, num records 64
3091 elsize 4, index stride 16 */
3092 /* shader will patch stride and desc[2] */
3094 desc
[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va
>> 32) |
3095 S_008F04_SWIZZLE_ENABLE(1);
3097 desc
[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
3098 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
3099 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
3100 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
) |
3101 S_008F0C_INDEX_STRIDE(1) |
3102 S_008F0C_ADD_TID_ENABLE(true);
3104 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3105 desc
[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
3106 S_008F0C_OOB_SELECT(2) |
3107 S_008F0C_RESOURCE_LEVEL(1);
3109 desc
[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
3110 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
) |
3111 S_008F0C_ELEMENT_SIZE(1);
3118 if (tess_rings_bo
) {
3119 uint64_t tess_va
= radv_buffer_get_va(tess_rings_bo
);
3120 uint64_t tess_offchip_va
= tess_va
+ tess_offchip_ring_offset
;
3123 desc
[1] = S_008F04_BASE_ADDRESS_HI(tess_va
>> 32);
3124 desc
[2] = tess_factor_ring_size
;
3125 desc
[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
3126 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
3127 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
3128 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
3130 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3131 desc
[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
3132 S_008F0C_OOB_SELECT(3) |
3133 S_008F0C_RESOURCE_LEVEL(1);
3135 desc
[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
3136 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
3139 desc
[4] = tess_offchip_va
;
3140 desc
[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va
>> 32);
3141 desc
[6] = tess_offchip_ring_size
;
3142 desc
[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X
) |
3143 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y
) |
3144 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z
) |
3145 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W
);
3147 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3148 desc
[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT
) |
3149 S_008F0C_OOB_SELECT(3) |
3150 S_008F0C_RESOURCE_LEVEL(1);
3152 desc
[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT
) |
3153 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32
);
3159 if (add_sample_positions
) {
3160 /* add sample positions after all rings */
3161 memcpy(desc
, queue
->device
->sample_locations_1x
, 8);
3163 memcpy(desc
, queue
->device
->sample_locations_2x
, 16);
3165 memcpy(desc
, queue
->device
->sample_locations_4x
, 32);
3167 memcpy(desc
, queue
->device
->sample_locations_8x
, 64);
3172 radv_get_hs_offchip_param(struct radv_device
*device
, uint32_t *max_offchip_buffers_p
)
3174 bool double_offchip_buffers
= device
->physical_device
->rad_info
.chip_class
>= GFX7
&&
3175 device
->physical_device
->rad_info
.family
!= CHIP_CARRIZO
&&
3176 device
->physical_device
->rad_info
.family
!= CHIP_STONEY
;
3177 unsigned max_offchip_buffers_per_se
= double_offchip_buffers
? 128 : 64;
3178 unsigned max_offchip_buffers
;
3179 unsigned offchip_granularity
;
3180 unsigned hs_offchip_param
;
3184 * This must be one less than the maximum number due to a hw limitation.
3185 * Various hardware bugs need thGFX7
3188 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3189 * Gfx7 should limit max_offchip_buffers to 508
3190 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3192 * Follow AMDVLK here.
3194 if (device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3195 max_offchip_buffers_per_se
= 256;
3196 } else if (device
->physical_device
->rad_info
.family
== CHIP_VEGA10
||
3197 device
->physical_device
->rad_info
.chip_class
== GFX7
||
3198 device
->physical_device
->rad_info
.chip_class
== GFX6
)
3199 --max_offchip_buffers_per_se
;
3201 max_offchip_buffers
= max_offchip_buffers_per_se
*
3202 device
->physical_device
->rad_info
.max_se
;
3204 /* Hawaii has a bug with offchip buffers > 256 that can be worked
3205 * around by setting 4K granularity.
3207 if (device
->tess_offchip_block_dw_size
== 4096) {
3208 assert(device
->physical_device
->rad_info
.family
== CHIP_HAWAII
);
3209 offchip_granularity
= V_03093C_X_4K_DWORDS
;
3211 assert(device
->tess_offchip_block_dw_size
== 8192);
3212 offchip_granularity
= V_03093C_X_8K_DWORDS
;
3215 switch (device
->physical_device
->rad_info
.chip_class
) {
3217 max_offchip_buffers
= MIN2(max_offchip_buffers
, 126);
3222 max_offchip_buffers
= MIN2(max_offchip_buffers
, 508);
3230 *max_offchip_buffers_p
= max_offchip_buffers
;
3231 if (device
->physical_device
->rad_info
.chip_class
>= GFX7
) {
3232 if (device
->physical_device
->rad_info
.chip_class
>= GFX8
)
3233 --max_offchip_buffers
;
3235 S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers
) |
3236 S_03093C_OFFCHIP_GRANULARITY(offchip_granularity
);
3239 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers
);
3241 return hs_offchip_param
;
3245 radv_emit_gs_ring_sizes(struct radv_queue
*queue
, struct radeon_cmdbuf
*cs
,
3246 struct radeon_winsys_bo
*esgs_ring_bo
,
3247 uint32_t esgs_ring_size
,
3248 struct radeon_winsys_bo
*gsvs_ring_bo
,
3249 uint32_t gsvs_ring_size
)
3251 if (!esgs_ring_bo
&& !gsvs_ring_bo
)
3255 radv_cs_add_buffer(queue
->device
->ws
, cs
, esgs_ring_bo
);
3258 radv_cs_add_buffer(queue
->device
->ws
, cs
, gsvs_ring_bo
);
3260 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX7
) {
3261 radeon_set_uconfig_reg_seq(cs
, R_030900_VGT_ESGS_RING_SIZE
, 2);
3262 radeon_emit(cs
, esgs_ring_size
>> 8);
3263 radeon_emit(cs
, gsvs_ring_size
>> 8);
3265 radeon_set_config_reg_seq(cs
, R_0088C8_VGT_ESGS_RING_SIZE
, 2);
3266 radeon_emit(cs
, esgs_ring_size
>> 8);
3267 radeon_emit(cs
, gsvs_ring_size
>> 8);
3272 radv_emit_tess_factor_ring(struct radv_queue
*queue
, struct radeon_cmdbuf
*cs
,
3273 unsigned hs_offchip_param
, unsigned tf_ring_size
,
3274 struct radeon_winsys_bo
*tess_rings_bo
)
3281 tf_va
= radv_buffer_get_va(tess_rings_bo
);
3283 radv_cs_add_buffer(queue
->device
->ws
, cs
, tess_rings_bo
);
3285 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX7
) {
3286 radeon_set_uconfig_reg(cs
, R_030938_VGT_TF_RING_SIZE
,
3287 S_030938_SIZE(tf_ring_size
/ 4));
3288 radeon_set_uconfig_reg(cs
, R_030940_VGT_TF_MEMORY_BASE
,
3291 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3292 radeon_set_uconfig_reg(cs
, R_030984_VGT_TF_MEMORY_BASE_HI_UMD
,
3293 S_030984_BASE_HI(tf_va
>> 40));
3294 } else if (queue
->device
->physical_device
->rad_info
.chip_class
== GFX9
) {
3295 radeon_set_uconfig_reg(cs
, R_030944_VGT_TF_MEMORY_BASE_HI
,
3296 S_030944_BASE_HI(tf_va
>> 40));
3298 radeon_set_uconfig_reg(cs
, R_03093C_VGT_HS_OFFCHIP_PARAM
,
3301 radeon_set_config_reg(cs
, R_008988_VGT_TF_RING_SIZE
,
3302 S_008988_SIZE(tf_ring_size
/ 4));
3303 radeon_set_config_reg(cs
, R_0089B8_VGT_TF_MEMORY_BASE
,
3305 radeon_set_config_reg(cs
, R_0089B0_VGT_HS_OFFCHIP_PARAM
,
3311 radv_emit_graphics_scratch(struct radv_queue
*queue
, struct radeon_cmdbuf
*cs
,
3312 uint32_t size_per_wave
, uint32_t waves
,
3313 struct radeon_winsys_bo
*scratch_bo
)
3315 if (queue
->queue_family_index
!= RADV_QUEUE_GENERAL
)
3321 radv_cs_add_buffer(queue
->device
->ws
, cs
, scratch_bo
);
3323 radeon_set_context_reg(cs
, R_0286E8_SPI_TMPRING_SIZE
,
3324 S_0286E8_WAVES(waves
) |
3325 S_0286E8_WAVESIZE(round_up_u32(size_per_wave
, 1024)));
3329 radv_emit_compute_scratch(struct radv_queue
*queue
, struct radeon_cmdbuf
*cs
,
3330 uint32_t size_per_wave
, uint32_t waves
,
3331 struct radeon_winsys_bo
*compute_scratch_bo
)
3333 uint64_t scratch_va
;
3335 if (!compute_scratch_bo
)
3338 scratch_va
= radv_buffer_get_va(compute_scratch_bo
);
3340 radv_cs_add_buffer(queue
->device
->ws
, cs
, compute_scratch_bo
);
3342 radeon_set_sh_reg_seq(cs
, R_00B900_COMPUTE_USER_DATA_0
, 2);
3343 radeon_emit(cs
, scratch_va
);
3344 radeon_emit(cs
, S_008F04_BASE_ADDRESS_HI(scratch_va
>> 32) |
3345 S_008F04_SWIZZLE_ENABLE(1));
3347 radeon_set_sh_reg(cs
, R_00B860_COMPUTE_TMPRING_SIZE
,
3348 S_00B860_WAVES(waves
) |
3349 S_00B860_WAVESIZE(round_up_u32(size_per_wave
, 1024)));
3353 radv_emit_global_shader_pointers(struct radv_queue
*queue
,
3354 struct radeon_cmdbuf
*cs
,
3355 struct radeon_winsys_bo
*descriptor_bo
)
3362 va
= radv_buffer_get_va(descriptor_bo
);
3364 radv_cs_add_buffer(queue
->device
->ws
, cs
, descriptor_bo
);
3366 if (queue
->device
->physical_device
->rad_info
.chip_class
>= GFX10
) {
3367 uint32_t regs
[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0
,
3368 R_00B130_SPI_SHADER_USER_DATA_VS_0
,
3369 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS
,
3370 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS
};
3372 for (int i
= 0; i
< ARRAY_SIZE(regs
); ++i
) {
3373 radv_emit_shader_pointer(queue
->device
, cs
, regs
[i
],
3376 } else if (queue
->device
->physical_device
->rad_info
.chip_class
== GFX9
) {
3377 uint32_t regs
[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0
,
3378 R_00B130_SPI_SHADER_USER_DATA_VS_0
,
3379 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS
,
3380 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS
};
3382 for (int i
= 0; i
< ARRAY_SIZE(regs
); ++i
) {
3383 radv_emit_shader_pointer(queue
->device
, cs
, regs
[i
],
3387 uint32_t regs
[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0
,
3388 R_00B130_SPI_SHADER_USER_DATA_VS_0
,
3389 R_00B230_SPI_SHADER_USER_DATA_GS_0
,
3390 R_00B330_SPI_SHADER_USER_DATA_ES_0
,
3391 R_00B430_SPI_SHADER_USER_DATA_HS_0
,
3392 R_00B530_SPI_SHADER_USER_DATA_LS_0
};
3394 for (int i
= 0; i
< ARRAY_SIZE(regs
); ++i
) {
3395 radv_emit_shader_pointer(queue
->device
, cs
, regs
[i
],
3402 radv_init_graphics_state(struct radeon_cmdbuf
*cs
, struct radv_queue
*queue
)
3404 struct radv_device
*device
= queue
->device
;
3406 if (device
->gfx_init
) {
3407 uint64_t va
= radv_buffer_get_va(device
->gfx_init
);
3409 radeon_emit(cs
, PKT3(PKT3_INDIRECT_BUFFER_CIK
, 2, 0));
3410 radeon_emit(cs
, va
);
3411 radeon_emit(cs
, va
>> 32);
3412 radeon_emit(cs
, device
->gfx_init_size_dw
& 0xffff);
3414 radv_cs_add_buffer(device
->ws
, cs
, device
->gfx_init
);
3416 struct radv_physical_device
*physical_device
= device
->physical_device
;
3417 si_emit_graphics(physical_device
, cs
);
3422 radv_init_compute_state(struct radeon_cmdbuf
*cs
, struct radv_queue
*queue
)
3424 struct radv_physical_device
*physical_device
= queue
->device
->physical_device
;
3425 si_emit_compute(physical_device
, cs
);
3429 radv_get_preamble_cs(struct radv_queue
*queue
,
3430 uint32_t scratch_size_per_wave
,
3431 uint32_t scratch_waves
,
3432 uint32_t compute_scratch_size_per_wave
,
3433 uint32_t compute_scratch_waves
,
3434 uint32_t esgs_ring_size
,
3435 uint32_t gsvs_ring_size
,
3436 bool needs_tess_rings
,
3438 bool needs_sample_positions
,
3439 struct radeon_cmdbuf
**initial_full_flush_preamble_cs
,
3440 struct radeon_cmdbuf
**initial_preamble_cs
,
3441 struct radeon_cmdbuf
**continue_preamble_cs
)
3443 struct radeon_winsys_bo
*scratch_bo
= NULL
;
3444 struct radeon_winsys_bo
*descriptor_bo
= NULL
;
3445 struct radeon_winsys_bo
*compute_scratch_bo
= NULL
;
3446 struct radeon_winsys_bo
*esgs_ring_bo
= NULL
;
3447 struct radeon_winsys_bo
*gsvs_ring_bo
= NULL
;
3448 struct radeon_winsys_bo
*tess_rings_bo
= NULL
;
3449 struct radeon_winsys_bo
*gds_bo
= NULL
;
3450 struct radeon_winsys_bo
*gds_oa_bo
= NULL
;
3451 struct radeon_cmdbuf
*dest_cs
[3] = {0};
3452 bool add_tess_rings
= false, add_gds
= false, add_sample_positions
= false;
3453 unsigned tess_factor_ring_size
= 0, tess_offchip_ring_size
= 0;
3454 unsigned max_offchip_buffers
;
3455 unsigned hs_offchip_param
= 0;
3456 unsigned tess_offchip_ring_offset
;
3457 uint32_t ring_bo_flags
= RADEON_FLAG_NO_CPU_ACCESS
| RADEON_FLAG_NO_INTERPROCESS_SHARING
;
3458 if (!queue
->has_tess_rings
) {
3459 if (needs_tess_rings
)
3460 add_tess_rings
= true;
3462 if (!queue
->has_gds
) {
3466 if (!queue
->has_sample_positions
) {
3467 if (needs_sample_positions
)
3468 add_sample_positions
= true;
3470 tess_factor_ring_size
= 32768 * queue
->device
->physical_device
->rad_info
.max_se
;
3471 hs_offchip_param
= radv_get_hs_offchip_param(queue
->device
,
3472 &max_offchip_buffers
);
3473 tess_offchip_ring_offset
= align(tess_factor_ring_size
, 64 * 1024);
3474 tess_offchip_ring_size
= max_offchip_buffers
*
3475 queue
->device
->tess_offchip_block_dw_size
* 4;
3477 scratch_size_per_wave
= MAX2(scratch_size_per_wave
, queue
->scratch_size_per_wave
);
3478 if (scratch_size_per_wave
)