d51a669b383776529116a84f499ea0affb2cab2e
[mesa.git] / src / amd / vulkan / radv_private.h
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #ifndef RADV_PRIVATE_H
29 #define RADV_PRIVATE_H
30
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <stdbool.h>
34 #include <pthread.h>
35 #include <assert.h>
36 #include <stdint.h>
37 #include <string.h>
38 #ifdef HAVE_VALGRIND
39 #include <valgrind.h>
40 #include <memcheck.h>
41 #define VG(x) x
42 #else
43 #define VG(x)
44 #endif
45
46 #include <amdgpu.h>
47 #include "compiler/shader_enums.h"
48 #include "util/macros.h"
49 #include "util/list.h"
50 #include "main/macros.h"
51 #include "vk_alloc.h"
52 #include "vk_debug_report.h"
53
54 #include "radv_radeon_winsys.h"
55 #include "ac_binary.h"
56 #include "ac_nir_to_llvm.h"
57 #include "ac_gpu_info.h"
58 #include "ac_surface.h"
59 #include "radv_descriptor_set.h"
60
61 #include <llvm-c/TargetMachine.h>
62
63 /* Pre-declarations needed for WSI entrypoints */
64 struct wl_surface;
65 struct wl_display;
66 typedef struct xcb_connection_t xcb_connection_t;
67 typedef uint32_t xcb_visualid_t;
68 typedef uint32_t xcb_window_t;
69
70 #include <vulkan/vulkan.h>
71 #include <vulkan/vulkan_intel.h>
72 #include <vulkan/vk_icd.h>
73
74 #include "radv_entrypoints.h"
75
76 #include "wsi_common.h"
77
78 #define ATI_VENDOR_ID 0x1002
79
80 #define MAX_VBS 32
81 #define MAX_VERTEX_ATTRIBS 32
82 #define MAX_RTS 8
83 #define MAX_VIEWPORTS 16
84 #define MAX_SCISSORS 16
85 #define MAX_DISCARD_RECTANGLES 4
86 #define MAX_PUSH_CONSTANTS_SIZE 128
87 #define MAX_PUSH_DESCRIPTORS 32
88 #define MAX_DYNAMIC_BUFFERS 16
89 #define MAX_SAMPLES_LOG2 4
90 #define NUM_META_FS_KEYS 13
91 #define RADV_MAX_DRM_DEVICES 8
92 #define MAX_VIEWS 8
93
94 #define NUM_DEPTH_CLEAR_PIPELINES 3
95
96 enum radv_mem_heap {
97 RADV_MEM_HEAP_VRAM,
98 RADV_MEM_HEAP_VRAM_CPU_ACCESS,
99 RADV_MEM_HEAP_GTT,
100 RADV_MEM_HEAP_COUNT
101 };
102
103 enum radv_mem_type {
104 RADV_MEM_TYPE_VRAM,
105 RADV_MEM_TYPE_GTT_WRITE_COMBINE,
106 RADV_MEM_TYPE_VRAM_CPU_ACCESS,
107 RADV_MEM_TYPE_GTT_CACHED,
108 RADV_MEM_TYPE_COUNT
109 };
110
111 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
112
113 static inline uint32_t
114 align_u32(uint32_t v, uint32_t a)
115 {
116 assert(a != 0 && a == (a & -a));
117 return (v + a - 1) & ~(a - 1);
118 }
119
120 static inline uint32_t
121 align_u32_npot(uint32_t v, uint32_t a)
122 {
123 return (v + a - 1) / a * a;
124 }
125
126 static inline uint64_t
127 align_u64(uint64_t v, uint64_t a)
128 {
129 assert(a != 0 && a == (a & -a));
130 return (v + a - 1) & ~(a - 1);
131 }
132
133 static inline int32_t
134 align_i32(int32_t v, int32_t a)
135 {
136 assert(a != 0 && a == (a & -a));
137 return (v + a - 1) & ~(a - 1);
138 }
139
140 /** Alignment must be a power of 2. */
141 static inline bool
142 radv_is_aligned(uintmax_t n, uintmax_t a)
143 {
144 assert(a == (a & -a));
145 return (n & (a - 1)) == 0;
146 }
147
148 static inline uint32_t
149 round_up_u32(uint32_t v, uint32_t a)
150 {
151 return (v + a - 1) / a;
152 }
153
154 static inline uint64_t
155 round_up_u64(uint64_t v, uint64_t a)
156 {
157 return (v + a - 1) / a;
158 }
159
160 static inline uint32_t
161 radv_minify(uint32_t n, uint32_t levels)
162 {
163 if (unlikely(n == 0))
164 return 0;
165 else
166 return MAX2(n >> levels, 1);
167 }
168 static inline float
169 radv_clamp_f(float f, float min, float max)
170 {
171 assert(min < max);
172
173 if (f > max)
174 return max;
175 else if (f < min)
176 return min;
177 else
178 return f;
179 }
180
181 static inline bool
182 radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
183 {
184 if (*inout_mask & clear_mask) {
185 *inout_mask &= ~clear_mask;
186 return true;
187 } else {
188 return false;
189 }
190 }
191
192 #define for_each_bit(b, dword) \
193 for (uint32_t __dword = (dword); \
194 (b) = __builtin_ffs(__dword) - 1, __dword; \
195 __dword &= ~(1 << (b)))
196
197 #define typed_memcpy(dest, src, count) ({ \
198 STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
199 memcpy((dest), (src), (count) * sizeof(*(src))); \
200 })
201
202 /* Whenever we generate an error, pass it through this function. Useful for
203 * debugging, where we can break on it. Only call at error site, not when
204 * propagating errors. Might be useful to plug in a stack trace here.
205 */
206
207 VkResult __vk_errorf(VkResult error, const char *file, int line, const char *format, ...);
208
209 #ifdef DEBUG
210 #define vk_error(error) __vk_errorf(error, __FILE__, __LINE__, NULL);
211 #define vk_errorf(error, format, ...) __vk_errorf(error, __FILE__, __LINE__, format, ## __VA_ARGS__);
212 #else
213 #define vk_error(error) error
214 #define vk_errorf(error, format, ...) error
215 #endif
216
217 void __radv_finishme(const char *file, int line, const char *format, ...)
218 radv_printflike(3, 4);
219 void radv_loge(const char *format, ...) radv_printflike(1, 2);
220 void radv_loge_v(const char *format, va_list va);
221
222 /**
223 * Print a FINISHME message, including its source location.
224 */
225 #define radv_finishme(format, ...) \
226 do { \
227 static bool reported = false; \
228 if (!reported) { \
229 __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
230 reported = true; \
231 } \
232 } while (0)
233
234 /* A non-fatal assert. Useful for debugging. */
235 #ifdef DEBUG
236 #define radv_assert(x) ({ \
237 if (unlikely(!(x))) \
238 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
239 })
240 #else
241 #define radv_assert(x)
242 #endif
243
244 #define stub_return(v) \
245 do { \
246 radv_finishme("stub %s", __func__); \
247 return (v); \
248 } while (0)
249
250 #define stub() \
251 do { \
252 radv_finishme("stub %s", __func__); \
253 return; \
254 } while (0)
255
256 void *radv_lookup_entrypoint(const char *name);
257
258 struct radv_physical_device {
259 VK_LOADER_DATA _loader_data;
260
261 struct radv_instance * instance;
262
263 struct radeon_winsys *ws;
264 struct radeon_info rad_info;
265 char path[20];
266 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
267 uint8_t driver_uuid[VK_UUID_SIZE];
268 uint8_t device_uuid[VK_UUID_SIZE];
269 uint8_t cache_uuid[VK_UUID_SIZE];
270
271 int local_fd;
272 struct wsi_device wsi_device;
273
274 bool has_rbplus; /* if RB+ register exist */
275 bool rbplus_allowed; /* if RB+ is allowed */
276 bool has_clear_state;
277 bool cpdma_prefetch_writes_memory;
278 bool has_scissor_bug;
279
280 /* This is the drivers on-disk cache used as a fallback as opposed to
281 * the pipeline cache defined by apps.
282 */
283 struct disk_cache * disk_cache;
284
285 VkPhysicalDeviceMemoryProperties memory_properties;
286 enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
287 };
288
289 struct radv_instance {
290 VK_LOADER_DATA _loader_data;
291
292 VkAllocationCallbacks alloc;
293
294 uint32_t apiVersion;
295 int physicalDeviceCount;
296 struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES];
297
298 uint64_t debug_flags;
299 uint64_t perftest_flags;
300
301 struct vk_debug_report_instance debug_report_callbacks;
302 };
303
304 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
305 void radv_finish_wsi(struct radv_physical_device *physical_device);
306
307 bool radv_instance_extension_supported(const char *name);
308 uint32_t radv_physical_device_api_version(struct radv_physical_device *dev);
309 bool radv_physical_device_extension_supported(struct radv_physical_device *dev,
310 const char *name);
311
312 struct cache_entry;
313
314 struct radv_pipeline_cache {
315 struct radv_device * device;
316 pthread_mutex_t mutex;
317
318 uint32_t total_size;
319 uint32_t table_size;
320 uint32_t kernel_count;
321 struct cache_entry ** hash_table;
322 bool modified;
323
324 VkAllocationCallbacks alloc;
325 };
326
327 struct radv_pipeline_key {
328 uint32_t instance_rate_inputs;
329 unsigned tess_input_vertices;
330 uint32_t col_format;
331 uint32_t is_int8;
332 uint32_t is_int10;
333 uint32_t multisample : 1;
334 uint32_t has_multiview_view_index : 1;
335 };
336
337 void
338 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
339 struct radv_device *device);
340 void
341 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
342 void
343 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
344 const void *data, size_t size);
345
346 struct radv_shader_variant;
347
348 bool
349 radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
350 struct radv_pipeline_cache *cache,
351 const unsigned char *sha1,
352 struct radv_shader_variant **variants);
353
354 void
355 radv_pipeline_cache_insert_shaders(struct radv_device *device,
356 struct radv_pipeline_cache *cache,
357 const unsigned char *sha1,
358 struct radv_shader_variant **variants,
359 const void *const *codes,
360 const unsigned *code_sizes);
361
362 enum radv_blit_ds_layout {
363 RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
364 RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
365 RADV_BLIT_DS_LAYOUT_COUNT,
366 };
367
368 static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout)
369 {
370 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
371 }
372
373 static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
374 {
375 return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
376 }
377
378 enum radv_meta_dst_layout {
379 RADV_META_DST_LAYOUT_GENERAL,
380 RADV_META_DST_LAYOUT_OPTIMAL,
381 RADV_META_DST_LAYOUT_COUNT,
382 };
383
384 static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout)
385 {
386 return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL;
387 }
388
389 static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
390 {
391 return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
392 }
393
394 struct radv_meta_state {
395 VkAllocationCallbacks alloc;
396
397 struct radv_pipeline_cache cache;
398
399 /**
400 * Use array element `i` for images with `2^i` samples.
401 */
402 struct {
403 VkRenderPass render_pass[NUM_META_FS_KEYS];
404 VkPipeline color_pipelines[NUM_META_FS_KEYS];
405
406 VkRenderPass depthstencil_rp;
407 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
408 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
409 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
410 } clear[1 + MAX_SAMPLES_LOG2];
411
412 VkPipelineLayout clear_color_p_layout;
413 VkPipelineLayout clear_depth_p_layout;
414 struct {
415 VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
416
417 /** Pipeline that blits from a 1D image. */
418 VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
419
420 /** Pipeline that blits from a 2D image. */
421 VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
422
423 /** Pipeline that blits from a 3D image. */
424 VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
425
426 VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
427 VkPipeline depth_only_1d_pipeline;
428 VkPipeline depth_only_2d_pipeline;
429 VkPipeline depth_only_3d_pipeline;
430
431 VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
432 VkPipeline stencil_only_1d_pipeline;
433 VkPipeline stencil_only_2d_pipeline;
434 VkPipeline stencil_only_3d_pipeline;
435 VkPipelineLayout pipeline_layout;
436 VkDescriptorSetLayout ds_layout;
437 } blit;
438
439 struct {
440 VkRenderPass render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
441
442 VkPipelineLayout p_layouts[3];
443 VkDescriptorSetLayout ds_layouts[3];
444 VkPipeline pipelines[3][NUM_META_FS_KEYS];
445
446 VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
447 VkPipeline depth_only_pipeline[3];
448
449 VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
450 VkPipeline stencil_only_pipeline[3];
451 } blit2d;
452
453 struct {
454 VkPipelineLayout img_p_layout;
455 VkDescriptorSetLayout img_ds_layout;
456 VkPipeline pipeline;
457 VkPipeline pipeline_3d;
458 } itob;
459 struct {
460 VkPipelineLayout img_p_layout;
461 VkDescriptorSetLayout img_ds_layout;
462 VkPipeline pipeline;
463 VkPipeline pipeline_3d;
464 } btoi;
465 struct {
466 VkPipelineLayout img_p_layout;
467 VkDescriptorSetLayout img_ds_layout;
468 VkPipeline pipeline;
469 VkPipeline pipeline_3d;
470 } itoi;
471 struct {
472 VkPipelineLayout img_p_layout;
473 VkDescriptorSetLayout img_ds_layout;
474 VkPipeline pipeline;
475 VkPipeline pipeline_3d;
476 } cleari;
477
478 struct {
479 VkPipelineLayout p_layout;
480 VkPipeline pipeline;
481 VkRenderPass pass;
482 } resolve;
483
484 struct {
485 VkDescriptorSetLayout ds_layout;
486 VkPipelineLayout p_layout;
487 struct {
488 VkPipeline pipeline;
489 VkPipeline i_pipeline;
490 VkPipeline srgb_pipeline;
491 } rc[MAX_SAMPLES_LOG2];
492 } resolve_compute;
493
494 struct {
495 VkDescriptorSetLayout ds_layout;
496 VkPipelineLayout p_layout;
497
498 struct {
499 VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
500 VkPipeline pipeline[NUM_META_FS_KEYS];
501 } rc[MAX_SAMPLES_LOG2];
502 } resolve_fragment;
503
504 struct {
505 VkPipelineLayout p_layout;
506 VkPipeline decompress_pipeline;
507 VkPipeline resummarize_pipeline;
508 VkRenderPass pass;
509 } depth_decomp[1 + MAX_SAMPLES_LOG2];
510
511 struct {
512 VkPipelineLayout p_layout;
513 VkPipeline cmask_eliminate_pipeline;
514 VkPipeline fmask_decompress_pipeline;
515 VkPipeline dcc_decompress_pipeline;
516 VkRenderPass pass;
517
518 VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
519 VkPipelineLayout dcc_decompress_compute_p_layout;
520 VkPipeline dcc_decompress_compute_pipeline;
521 } fast_clear_flush;
522
523 struct {
524 VkPipelineLayout fill_p_layout;
525 VkPipelineLayout copy_p_layout;
526 VkDescriptorSetLayout fill_ds_layout;
527 VkDescriptorSetLayout copy_ds_layout;
528 VkPipeline fill_pipeline;
529 VkPipeline copy_pipeline;
530 } buffer;
531
532 struct {
533 VkDescriptorSetLayout ds_layout;
534 VkPipelineLayout p_layout;
535 VkPipeline occlusion_query_pipeline;
536 VkPipeline pipeline_statistics_query_pipeline;
537 } query;
538 };
539
540 /* queue types */
541 #define RADV_QUEUE_GENERAL 0
542 #define RADV_QUEUE_COMPUTE 1
543 #define RADV_QUEUE_TRANSFER 2
544
545 #define RADV_MAX_QUEUE_FAMILIES 3
546
547 enum ring_type radv_queue_family_to_ring(int f);
548
549 struct radv_queue {
550 VK_LOADER_DATA _loader_data;
551 struct radv_device * device;
552 struct radeon_winsys_ctx *hw_ctx;
553 enum radeon_ctx_priority priority;
554 uint32_t queue_family_index;
555 int queue_idx;
556
557 uint32_t scratch_size;
558 uint32_t compute_scratch_size;
559 uint32_t esgs_ring_size;
560 uint32_t gsvs_ring_size;
561 bool has_tess_rings;
562 bool has_sample_positions;
563
564 struct radeon_winsys_bo *scratch_bo;
565 struct radeon_winsys_bo *descriptor_bo;
566 struct radeon_winsys_bo *compute_scratch_bo;
567 struct radeon_winsys_bo *esgs_ring_bo;
568 struct radeon_winsys_bo *gsvs_ring_bo;
569 struct radeon_winsys_bo *tess_factor_ring_bo;
570 struct radeon_winsys_bo *tess_offchip_ring_bo;
571 struct radeon_winsys_cs *initial_preamble_cs;
572 struct radeon_winsys_cs *initial_full_flush_preamble_cs;
573 struct radeon_winsys_cs *continue_preamble_cs;
574 };
575
576 struct radv_device {
577 VK_LOADER_DATA _loader_data;
578
579 VkAllocationCallbacks alloc;
580
581 struct radv_instance * instance;
582 struct radeon_winsys *ws;
583
584 struct radv_meta_state meta_state;
585
586 struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
587 int queue_count[RADV_MAX_QUEUE_FAMILIES];
588 struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
589
590 bool llvm_supports_spill;
591 bool has_distributed_tess;
592 bool pbb_allowed;
593 bool dfsm_allowed;
594 uint32_t tess_offchip_block_dw_size;
595 uint32_t scratch_waves;
596 uint32_t dispatch_initiator;
597
598 uint32_t gs_table_depth;
599
600 /* MSAA sample locations.
601 * The first index is the sample index.
602 * The second index is the coordinate: X, Y. */
603 float sample_locations_1x[1][2];
604 float sample_locations_2x[2][2];
605 float sample_locations_4x[4][2];
606 float sample_locations_8x[8][2];
607 float sample_locations_16x[16][2];
608
609 /* CIK and later */
610 uint32_t gfx_init_size_dw;
611 struct radeon_winsys_bo *gfx_init;
612
613 struct radeon_winsys_bo *trace_bo;
614 uint32_t *trace_id_ptr;
615
616 /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
617 bool keep_shader_info;
618
619 struct radv_physical_device *physical_device;
620
621 /* Backup in-memory cache to be used if the app doesn't provide one */
622 struct radv_pipeline_cache * mem_cache;
623
624 /*
625 * use different counters so MSAA MRTs get consecutive surface indices,
626 * even if MASK is allocated in between.
627 */
628 uint32_t image_mrt_offset_counter;
629 uint32_t fmask_mrt_offset_counter;
630 struct list_head shader_slabs;
631 mtx_t shader_slab_mutex;
632
633 /* For detecting VM faults reported by dmesg. */
634 uint64_t dmesg_timestamp;
635 };
636
637 struct radv_device_memory {
638 struct radeon_winsys_bo *bo;
639 /* for dedicated allocations */
640 struct radv_image *image;
641 struct radv_buffer *buffer;
642 uint32_t type_index;
643 VkDeviceSize map_size;
644 void * map;
645 };
646
647
648 struct radv_descriptor_range {
649 uint64_t va;
650 uint32_t size;
651 };
652
653 struct radv_descriptor_set {
654 const struct radv_descriptor_set_layout *layout;
655 uint32_t size;
656
657 struct radeon_winsys_bo *bo;
658 uint64_t va;
659 uint32_t *mapped_ptr;
660 struct radv_descriptor_range *dynamic_descriptors;
661
662 struct radeon_winsys_bo *descriptors[0];
663 };
664
665 struct radv_push_descriptor_set
666 {
667 struct radv_descriptor_set set;
668 uint32_t capacity;
669 };
670
671 struct radv_descriptor_pool_entry {
672 uint32_t offset;
673 uint32_t size;
674 struct radv_descriptor_set *set;
675 };
676
677 struct radv_descriptor_pool {
678 struct radeon_winsys_bo *bo;
679 uint8_t *mapped_ptr;
680 uint64_t current_offset;
681 uint64_t size;
682
683 uint8_t *host_memory_base;
684 uint8_t *host_memory_ptr;
685 uint8_t *host_memory_end;
686
687 uint32_t entry_count;
688 uint32_t max_entry_count;
689 struct radv_descriptor_pool_entry entries[0];
690 };
691
692 struct radv_descriptor_update_template_entry {
693 VkDescriptorType descriptor_type;
694
695 /* The number of descriptors to update */
696 uint32_t descriptor_count;
697
698 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
699 uint32_t dst_offset;
700
701 /* In dwords. Not valid/used for dynamic descriptors */
702 uint32_t dst_stride;
703
704 uint32_t buffer_offset;
705
706 /* Only valid for combined image samplers and samplers */
707 uint16_t has_sampler;
708
709 /* In bytes */
710 size_t src_offset;
711 size_t src_stride;
712
713 /* For push descriptors */
714 const uint32_t *immutable_samplers;
715 };
716
717 struct radv_descriptor_update_template {
718 uint32_t entry_count;
719 struct radv_descriptor_update_template_entry entry[0];
720 };
721
722 struct radv_buffer {
723 struct radv_device * device;
724 VkDeviceSize size;
725
726 VkBufferUsageFlags usage;
727 VkBufferCreateFlags flags;
728
729 /* Set when bound */
730 struct radeon_winsys_bo * bo;
731 VkDeviceSize offset;
732
733 bool shareable;
734 };
735
736 enum radv_dynamic_state_bits {
737 RADV_DYNAMIC_VIEWPORT = 1 << 0,
738 RADV_DYNAMIC_SCISSOR = 1 << 1,
739 RADV_DYNAMIC_LINE_WIDTH = 1 << 2,
740 RADV_DYNAMIC_DEPTH_BIAS = 1 << 3,
741 RADV_DYNAMIC_BLEND_CONSTANTS = 1 << 4,
742 RADV_DYNAMIC_DEPTH_BOUNDS = 1 << 5,
743 RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6,
744 RADV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
745 RADV_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
746 RADV_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
747 RADV_DYNAMIC_ALL = (1 << 10) - 1,
748 };
749
750 enum radv_cmd_dirty_bits {
751 /* Keep the dynamic state dirty bits in sync with
752 * enum radv_dynamic_state_bits */
753 RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0,
754 RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1,
755 RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2,
756 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3,
757 RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4,
758 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5,
759 RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6,
760 RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
761 RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
762 RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
763 RADV_CMD_DIRTY_DYNAMIC_ALL = (1 << 10) - 1,
764 RADV_CMD_DIRTY_PIPELINE = 1 << 10,
765 RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11,
766 RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12,
767 RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13,
768 };
769
770 enum radv_cmd_flush_bits {
771 RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
772 /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
773 RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
774 /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
775 RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
776 /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
777 RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
778 /* Same as above, but only writes back and doesn't invalidate */
779 RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
780 /* Framebuffer caches */
781 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
782 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
783 RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
784 RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
785 /* Engine synchronization. */
786 RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
787 RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
788 RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
789 RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
790
791 RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
792 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
793 RADV_CMD_FLAG_FLUSH_AND_INV_DB |
794 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
795 };
796
797 struct radv_vertex_binding {
798 struct radv_buffer * buffer;
799 VkDeviceSize offset;
800 };
801
802 struct radv_viewport_state {
803 uint32_t count;
804 VkViewport viewports[MAX_VIEWPORTS];
805 };
806
807 struct radv_scissor_state {
808 uint32_t count;
809 VkRect2D scissors[MAX_SCISSORS];
810 };
811
812 struct radv_discard_rectangle_state {
813 uint32_t count;
814 VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
815 };
816
817 struct radv_dynamic_state {
818 /**
819 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
820 * Defines the set of saved dynamic state.
821 */
822 uint32_t mask;
823
824 struct radv_viewport_state viewport;
825
826 struct radv_scissor_state scissor;
827
828 float line_width;
829
830 struct {
831 float bias;
832 float clamp;
833 float slope;
834 } depth_bias;
835
836 float blend_constants[4];
837
838 struct {
839 float min;
840 float max;
841 } depth_bounds;
842
843 struct {
844 uint32_t front;
845 uint32_t back;
846 } stencil_compare_mask;
847
848 struct {
849 uint32_t front;
850 uint32_t back;
851 } stencil_write_mask;
852
853 struct {
854 uint32_t front;
855 uint32_t back;
856 } stencil_reference;
857
858 struct radv_discard_rectangle_state discard_rectangle;
859 };
860
861 extern const struct radv_dynamic_state default_dynamic_state;
862
863 const char *
864 radv_get_debug_option_name(int id);
865
866 const char *
867 radv_get_perftest_option_name(int id);
868
869 /**
870 * Attachment state when recording a renderpass instance.
871 *
872 * The clear value is valid only if there exists a pending clear.
873 */
874 struct radv_attachment_state {
875 VkImageAspectFlags pending_clear_aspects;
876 uint32_t cleared_views;
877 VkClearValue clear_value;
878 VkImageLayout current_layout;
879 };
880
881 struct radv_cmd_state {
882 /* Vertex descriptors */
883 bool vb_prefetch_dirty;
884 uint64_t vb_va;
885 unsigned vb_size;
886
887 bool push_descriptors_dirty;
888 bool predicating;
889 uint32_t dirty;
890
891 struct radv_pipeline * pipeline;
892 struct radv_pipeline * emitted_pipeline;
893 struct radv_pipeline * compute_pipeline;
894 struct radv_pipeline * emitted_compute_pipeline;
895 struct radv_framebuffer * framebuffer;
896 struct radv_render_pass * pass;
897 const struct radv_subpass * subpass;
898 struct radv_dynamic_state dynamic;
899 struct radv_attachment_state * attachments;
900 VkRect2D render_area;
901
902 /* Index buffer */
903 struct radv_buffer *index_buffer;
904 uint64_t index_offset;
905 uint32_t index_type;
906 uint32_t max_index_count;
907 uint64_t index_va;
908 int32_t last_index_type;
909
910 int32_t last_primitive_reset_en;
911 uint32_t last_primitive_reset_index;
912 enum radv_cmd_flush_bits flush_bits;
913 unsigned active_occlusion_queries;
914 float offset_scale;
915 uint32_t descriptors_dirty;
916 uint32_t valid_descriptors;
917 uint32_t trace_id;
918 uint32_t last_ia_multi_vgt_param;
919
920 uint32_t last_num_instances;
921 uint32_t last_first_instance;
922 uint32_t last_vertex_offset;
923 };
924
925 struct radv_cmd_pool {
926 VkAllocationCallbacks alloc;
927 struct list_head cmd_buffers;
928 struct list_head free_cmd_buffers;
929 uint32_t queue_family_index;
930 };
931
932 struct radv_cmd_buffer_upload {
933 uint8_t *map;
934 unsigned offset;
935 uint64_t size;
936 struct radeon_winsys_bo *upload_bo;
937 struct list_head list;
938 };
939
940 enum radv_cmd_buffer_status {
941 RADV_CMD_BUFFER_STATUS_INVALID,
942 RADV_CMD_BUFFER_STATUS_INITIAL,
943 RADV_CMD_BUFFER_STATUS_RECORDING,
944 RADV_CMD_BUFFER_STATUS_EXECUTABLE,
945 RADV_CMD_BUFFER_STATUS_PENDING,
946 };
947
948 struct radv_cmd_buffer {
949 VK_LOADER_DATA _loader_data;
950
951 struct radv_device * device;
952
953 struct radv_cmd_pool * pool;
954 struct list_head pool_link;
955
956 VkCommandBufferUsageFlags usage_flags;
957 VkCommandBufferLevel level;
958 enum radv_cmd_buffer_status status;
959 struct radeon_winsys_cs *cs;
960 struct radv_cmd_state state;
961 struct radv_vertex_binding vertex_bindings[MAX_VBS];
962 uint32_t queue_family_index;
963
964 uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
965 uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
966 VkShaderStageFlags push_constant_stages;
967 struct radv_push_descriptor_set push_descriptors;
968 struct radv_descriptor_set meta_push_descriptors;
969 struct radv_descriptor_set *descriptors[MAX_SETS];
970
971 struct radv_cmd_buffer_upload upload;
972
973 uint32_t scratch_size_needed;
974 uint32_t compute_scratch_size_needed;
975 uint32_t esgs_ring_size_needed;
976 uint32_t gsvs_ring_size_needed;
977 bool tess_rings_needed;
978 bool sample_positions_needed;
979
980 VkResult record_result;
981
982 int ring_offsets_idx; /* just used for verification */
983 uint32_t gfx9_fence_offset;
984 struct radeon_winsys_bo *gfx9_fence_bo;
985 uint32_t gfx9_fence_idx;
986 };
987
988 struct radv_image;
989
990 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
991
992 void si_init_compute(struct radv_cmd_buffer *cmd_buffer);
993 void si_init_config(struct radv_cmd_buffer *cmd_buffer);
994
995 void cik_create_gfx_config(struct radv_device *device);
996
997 void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
998 int count, const VkViewport *viewports);
999 void si_write_scissors(struct radeon_winsys_cs *cs, int first,
1000 int count, const VkRect2D *scissors,
1001 const VkViewport *viewports, bool can_use_guardband);
1002 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
1003 bool instanced_draw, bool indirect_draw,
1004 uint32_t draw_vertex_count);
1005 void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
1006 bool predicated,
1007 enum chip_class chip_class,
1008 bool is_mec,
1009 unsigned event, unsigned event_flags,
1010 unsigned data_sel,
1011 uint64_t va,
1012 uint32_t old_fence,
1013 uint32_t new_fence);
1014
1015 void si_emit_wait_fence(struct radeon_winsys_cs *cs,
1016 bool predicated,
1017 uint64_t va, uint32_t ref,
1018 uint32_t mask);
1019 void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
1020 bool predicated,
1021 enum chip_class chip_class,
1022 uint32_t *fence_ptr, uint64_t va,
1023 bool is_mec,
1024 enum radv_cmd_flush_bits flush_bits);
1025 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
1026 void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
1027 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
1028 uint64_t src_va, uint64_t dest_va,
1029 uint64_t size);
1030 void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
1031 unsigned size);
1032 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
1033 uint64_t size, unsigned value);
1034 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
1035 bool
1036 radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
1037 unsigned size,
1038 unsigned alignment,
1039 unsigned *out_offset,
1040 void **ptr);
1041 void
1042 radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
1043 const struct radv_subpass *subpass,
1044 bool transitions);
1045 bool
1046 radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
1047 unsigned size, unsigned alignmnet,
1048 const void *data, unsigned *out_offset);
1049
1050 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
1051 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
1052 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
1053 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
1054 void radv_cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
1055 unsigned radv_cayman_get_maxdist(int log_samples);
1056 void radv_device_init_msaa(struct radv_device *device);
1057 void radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
1058 struct radv_image *image,
1059 VkClearDepthStencilValue ds_clear_value,
1060 VkImageAspectFlags aspects);
1061 void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
1062 struct radv_image *image,
1063 int idx,
1064 uint32_t color_values[2]);
1065 void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
1066 struct radv_image *image,
1067 bool value);
1068 uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
1069 struct radeon_winsys_bo *bo,
1070 uint64_t offset, uint64_t size, uint32_t value);
1071 void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
1072 bool radv_get_memory_fd(struct radv_device *device,
1073 struct radv_device_memory *memory,
1074 int *pFD);
1075
1076 /*
1077 * Takes x,y,z as exact numbers of invocations, instead of blocks.
1078 *
1079 * Limitations: Can't call normal dispatch functions without binding or rebinding
1080 * the compute pipeline.
1081 */
1082 void radv_unaligned_dispatch(
1083 struct radv_cmd_buffer *cmd_buffer,
1084 uint32_t x,
1085 uint32_t y,
1086 uint32_t z);
1087
1088 struct radv_event {
1089 struct radeon_winsys_bo *bo;
1090 uint64_t *map;
1091 };
1092
1093 struct radv_shader_module;
1094
1095 #define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0)
1096 #define RADV_HASH_SHADER_SISCHED (1 << 1)
1097 #define RADV_HASH_SHADER_UNSAFE_MATH (1 << 2)
1098 void
1099 radv_hash_shaders(unsigned char *hash,
1100 const VkPipelineShaderStageCreateInfo **stages,
1101 const struct radv_pipeline_layout *layout,
1102 const struct radv_pipeline_key *key,
1103 uint32_t flags);
1104
1105 static inline gl_shader_stage
1106 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
1107 {
1108 assert(__builtin_popcount(vk_stage) == 1);
1109 return ffs(vk_stage) - 1;
1110 }
1111
1112 static inline VkShaderStageFlagBits
1113 mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
1114 {
1115 return (1 << mesa_stage);
1116 }
1117
1118 #define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
1119
1120 #define radv_foreach_stage(stage, stage_bits) \
1121 for (gl_shader_stage stage, \
1122 __tmp = (gl_shader_stage)((stage_bits) & RADV_STAGE_MASK); \
1123 stage = __builtin_ffs(__tmp) - 1, __tmp; \
1124 __tmp &= ~(1 << (stage)))
1125
1126 struct radv_depth_stencil_state {
1127 uint32_t db_depth_control;
1128 uint32_t db_stencil_control;
1129 uint32_t db_render_control;
1130 uint32_t db_render_override2;
1131 };
1132
1133 struct radv_blend_state {
1134 uint32_t cb_color_control;
1135 uint32_t cb_target_mask;
1136 uint32_t sx_mrt_blend_opt[8];
1137 uint32_t cb_blend_control[8];
1138
1139 uint32_t spi_shader_col_format;
1140 uint32_t cb_shader_mask;
1141 uint32_t db_alpha_to_mask;
1142 };
1143
1144 unsigned radv_format_meta_fs_key(VkFormat format);
1145
1146 struct radv_raster_state {
1147 uint32_t pa_cl_clip_cntl;
1148 uint32_t spi_interp_control;
1149 uint32_t pa_su_vtx_cntl;
1150 uint32_t pa_su_sc_mode_cntl;
1151 };
1152
1153 struct radv_multisample_state {
1154 uint32_t db_eqaa;
1155 uint32_t pa_sc_line_cntl;
1156 uint32_t pa_sc_mode_cntl_0;
1157 uint32_t pa_sc_mode_cntl_1;
1158 uint32_t pa_sc_aa_config;
1159 uint32_t pa_sc_aa_mask[2];
1160 unsigned num_samples;
1161 };
1162
1163 struct radv_prim_vertex_count {
1164 uint8_t min;
1165 uint8_t incr;
1166 };
1167
1168 struct radv_tessellation_state {
1169 uint32_t ls_hs_config;
1170 uint32_t tcs_in_layout;
1171 uint32_t tcs_out_layout;
1172 uint32_t tcs_out_offsets;
1173 uint32_t offchip_layout;
1174 unsigned num_patches;
1175 unsigned lds_size;
1176 unsigned num_tcs_input_cp;
1177 uint32_t tf_param;
1178 };
1179
1180 struct radv_gs_state {
1181 uint32_t vgt_gs_onchip_cntl;
1182 uint32_t vgt_gs_max_prims_per_subgroup;
1183 uint32_t vgt_esgs_ring_itemsize;
1184 uint32_t lds_size;
1185 };
1186
1187 struct radv_vertex_elements_info {
1188 uint32_t rsrc_word3[MAX_VERTEX_ATTRIBS];
1189 uint32_t format_size[MAX_VERTEX_ATTRIBS];
1190 uint32_t binding[MAX_VERTEX_ATTRIBS];
1191 uint32_t offset[MAX_VERTEX_ATTRIBS];
1192 uint32_t count;
1193 };
1194
1195 struct radv_vs_state {
1196 uint32_t pa_cl_vs_out_cntl;
1197 uint32_t spi_shader_pos_format;
1198 uint32_t spi_vs_out_config;
1199 uint32_t vgt_reuse_off;
1200 };
1201
1202 struct radv_binning_state {
1203 uint32_t pa_sc_binner_cntl_0;
1204 uint32_t db_dfsm_control;
1205 };
1206
1207 #define SI_GS_PER_ES 128
1208
1209 struct radv_pipeline {
1210 struct radv_device * device;
1211 struct radv_dynamic_state dynamic_state;
1212
1213 struct radv_pipeline_layout * layout;
1214
1215 bool needs_data_cache;
1216 bool need_indirect_descriptor_sets;
1217 struct radv_shader_variant * shaders[MESA_SHADER_STAGES];
1218 struct radv_shader_variant *gs_copy_shader;
1219 VkShaderStageFlags active_stages;
1220
1221 struct radv_vertex_elements_info vertex_elements;
1222
1223 uint32_t binding_stride[MAX_VBS];
1224
1225 uint32_t user_data_0[MESA_SHADER_STAGES];
1226 union {
1227 struct {
1228 struct radv_blend_state blend;
1229 struct radv_depth_stencil_state ds;
1230 struct radv_raster_state raster;
1231 struct radv_multisample_state ms;
1232 struct radv_tessellation_state tess;
1233 struct radv_gs_state gs;
1234 struct radv_vs_state vs;
1235 struct radv_binning_state bin;
1236 uint32_t db_shader_control;
1237 uint32_t shader_z_format;
1238 unsigned prim;
1239 unsigned gs_out;
1240 uint32_t vgt_gs_mode;
1241 bool vgt_primitiveid_en;
1242 bool prim_restart_enable;
1243 bool partial_es_wave;
1244 uint8_t primgroup_size;
1245 unsigned esgs_ring_size;
1246 unsigned gsvs_ring_size;
1247 uint32_t ps_input_cntl[32];
1248 uint32_t ps_input_cntl_num;
1249 uint32_t vgt_shader_stages_en;
1250 uint32_t vtx_base_sgpr;
1251 uint32_t base_ia_multi_vgt_param;
1252 bool wd_switch_on_eop;
1253 bool ia_switch_on_eoi;
1254 bool partial_vs_wave;
1255 uint8_t vtx_emit_num;
1256 uint32_t vtx_reuse_depth;
1257 struct radv_prim_vertex_count prim_vertex_count;
1258 bool can_use_guardband;
1259 uint32_t pa_sc_cliprect_rule;
1260 } graphics;
1261 };
1262
1263 unsigned max_waves;
1264 unsigned scratch_bytes_per_wave;
1265 };
1266
1267 static inline bool radv_pipeline_has_gs(struct radv_pipeline *pipeline)
1268 {
1269 return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
1270 }
1271
1272 static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline)
1273 {
1274 return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
1275 }
1276
1277 struct ac_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
1278 gl_shader_stage stage,
1279 int idx);
1280
1281 struct radv_shader_variant *radv_get_vertex_shader(struct radv_pipeline *pipeline);
1282
1283 struct radv_graphics_pipeline_create_info {
1284 bool use_rectlist;
1285 bool db_depth_clear;
1286 bool db_stencil_clear;
1287 bool db_depth_disable_expclear;
1288 bool db_stencil_disable_expclear;
1289 bool db_flush_depth_inplace;
1290 bool db_flush_stencil_inplace;
1291 bool db_resummarize;
1292 uint32_t custom_blend_mode;
1293 };
1294
1295 VkResult
1296 radv_graphics_pipeline_create(VkDevice device,
1297 VkPipelineCache cache,
1298 const VkGraphicsPipelineCreateInfo *pCreateInfo,
1299 const struct radv_graphics_pipeline_create_info *extra,
1300 const VkAllocationCallbacks *alloc,
1301 VkPipeline *pPipeline);
1302
1303 struct vk_format_description;
1304 uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc,
1305 int first_non_void);
1306 uint32_t radv_translate_buffer_numformat(const struct vk_format_description *desc,
1307 int first_non_void);
1308 uint32_t radv_translate_colorformat(VkFormat format);
1309 uint32_t radv_translate_color_numformat(VkFormat format,
1310 const struct vk_format_description *desc,
1311 int first_non_void);
1312 uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
1313 unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
1314 uint32_t radv_translate_dbformat(VkFormat format);
1315 uint32_t radv_translate_tex_dataformat(VkFormat format,
1316 const struct vk_format_description *desc,
1317 int first_non_void);
1318 uint32_t radv_translate_tex_numformat(VkFormat format,
1319 const struct vk_format_description *desc,
1320 int first_non_void);
1321 bool radv_format_pack_clear_color(VkFormat format,
1322 uint32_t clear_vals[2],
1323 VkClearColorValue *value);
1324 bool radv_is_colorbuffer_format_supported(VkFormat format, bool *blendable);
1325 bool radv_dcc_formats_compatible(VkFormat format1,
1326 VkFormat format2);
1327
1328 struct radv_fmask_info {
1329 uint64_t offset;
1330 uint64_t size;
1331 unsigned alignment;
1332 unsigned pitch_in_pixels;
1333 unsigned bank_height;
1334 unsigned slice_tile_max;
1335 unsigned tile_mode_index;
1336 unsigned tile_swizzle;
1337 };
1338
1339 struct radv_cmask_info {
1340 uint64_t offset;
1341 uint64_t size;
1342 unsigned alignment;
1343 unsigned slice_tile_max;
1344 };
1345
1346 struct radv_image {
1347 VkImageType type;
1348 /* The original VkFormat provided by the client. This may not match any
1349 * of the actual surface formats.
1350 */
1351 VkFormat vk_format;
1352 VkImageAspectFlags aspects;
1353 VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
1354 struct ac_surf_info info;
1355 VkImageTiling tiling; /** VkImageCreateInfo::tiling */
1356 VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
1357
1358 VkDeviceSize size;
1359 uint32_t alignment;
1360
1361 unsigned queue_family_mask;
1362 bool exclusive;
1363 bool shareable;
1364
1365 /* Set when bound */
1366 struct radeon_winsys_bo *bo;
1367 VkDeviceSize offset;
1368 uint64_t dcc_offset;
1369 uint64_t htile_offset;
1370 bool tc_compatible_htile;
1371 struct radeon_surf surface;
1372
1373 struct radv_fmask_info fmask;
1374 struct radv_cmask_info cmask;
1375 uint64_t clear_value_offset;
1376 uint64_t dcc_pred_offset;
1377 };
1378
1379 /* Whether the image has a htile that is known consistent with the contents of
1380 * the image. */
1381 bool radv_layout_has_htile(const struct radv_image *image,
1382 VkImageLayout layout,
1383 unsigned queue_mask);
1384
1385 /* Whether the image has a htile that is known consistent with the contents of
1386 * the image and is allowed to be in compressed form.
1387 *
1388 * If this is false reads that don't use the htile should be able to return
1389 * correct results.
1390 */
1391 bool radv_layout_is_htile_compressed(const struct radv_image *image,
1392 VkImageLayout layout,
1393 unsigned queue_mask);
1394
1395 bool radv_layout_can_fast_clear(const struct radv_image *image,
1396 VkImageLayout layout,
1397 unsigned queue_mask);
1398
1399 bool radv_layout_dcc_compressed(const struct radv_image *image,
1400 VkImageLayout layout,
1401 unsigned queue_mask);
1402
1403 static inline bool
1404 radv_vi_dcc_enabled(const struct radv_image *image, unsigned level)
1405 {
1406 return image->surface.dcc_size && level < image->surface.num_dcc_levels;
1407 }
1408
1409 static inline bool
1410 radv_htile_enabled(const struct radv_image *image, unsigned level)
1411 {
1412 return image->surface.htile_size && level == 0;
1413 }
1414
1415 unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family);
1416
1417 static inline uint32_t
1418 radv_get_layerCount(const struct radv_image *image,
1419 const VkImageSubresourceRange *range)
1420 {
1421 return range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
1422 image->info.array_size - range->baseArrayLayer : range->layerCount;
1423 }
1424
1425 static inline uint32_t
1426 radv_get_levelCount(const struct radv_image *image,
1427 const VkImageSubresourceRange *range)
1428 {
1429 return range->levelCount == VK_REMAINING_MIP_LEVELS ?
1430 image->info.levels - range->baseMipLevel : range->levelCount;
1431 }
1432
1433 struct radeon_bo_metadata;
1434 void
1435 radv_init_metadata(struct radv_device *device,
1436 struct radv_image *image,
1437 struct radeon_bo_metadata *metadata);
1438
1439 struct radv_image_view {
1440 struct radv_image *image; /**< VkImageViewCreateInfo::image */
1441 struct radeon_winsys_bo *bo;
1442
1443 VkImageViewType type;
1444 VkImageAspectFlags aspect_mask;
1445 VkFormat vk_format;
1446 uint32_t base_layer;
1447 uint32_t layer_count;
1448 uint32_t base_mip;
1449 uint32_t level_count;
1450 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
1451
1452 uint32_t descriptor[16];
1453
1454 /* Descriptor for use as a storage image as opposed to a sampled image.
1455 * This has a few differences for cube maps (e.g. type).
1456 */
1457 uint32_t storage_descriptor[16];
1458 };
1459
1460 struct radv_image_create_info {
1461 const VkImageCreateInfo *vk_info;
1462 bool scanout;
1463 };
1464
1465 VkResult radv_image_create(VkDevice _device,
1466 const struct radv_image_create_info *info,
1467 const VkAllocationCallbacks* alloc,
1468 VkImage *pImage);
1469
1470 void radv_image_view_init(struct radv_image_view *view,
1471 struct radv_device *device,
1472 const VkImageViewCreateInfo* pCreateInfo);
1473
1474 struct radv_buffer_view {
1475 struct radeon_winsys_bo *bo;
1476 VkFormat vk_format;
1477 uint64_t range; /**< VkBufferViewCreateInfo::range */
1478 uint32_t state[4];
1479 };
1480 void radv_buffer_view_init(struct radv_buffer_view *view,
1481 struct radv_device *device,
1482 const VkBufferViewCreateInfo* pCreateInfo);
1483
1484 static inline struct VkExtent3D
1485 radv_sanitize_image_extent(const VkImageType imageType,
1486 const struct VkExtent3D imageExtent)
1487 {
1488 switch (imageType) {
1489 case VK_IMAGE_TYPE_1D:
1490 return (VkExtent3D) { imageExtent.width, 1, 1 };
1491 case VK_IMAGE_TYPE_2D:
1492 return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
1493 case VK_IMAGE_TYPE_3D:
1494 return imageExtent;
1495 default:
1496 unreachable("invalid image type");
1497 }
1498 }
1499
1500 static inline struct VkOffset3D
1501 radv_sanitize_image_offset(const VkImageType imageType,
1502 const struct VkOffset3D imageOffset)
1503 {
1504 switch (imageType) {
1505 case VK_IMAGE_TYPE_1D:
1506 return (VkOffset3D) { imageOffset.x, 0, 0 };
1507 case VK_IMAGE_TYPE_2D:
1508 return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
1509 case VK_IMAGE_TYPE_3D:
1510 return imageOffset;
1511 default:
1512 unreachable("invalid image type");
1513 }
1514 }
1515
1516 static inline bool
1517 radv_image_extent_compare(const struct radv_image *image,
1518 const VkExtent3D *extent)
1519 {
1520 if (extent->width != image->info.width ||
1521 extent->height != image->info.height ||
1522 extent->depth != image->info.depth)
1523 return false;
1524 return true;
1525 }
1526
1527 struct radv_sampler {
1528 uint32_t state[4];
1529 };
1530
1531 struct radv_color_buffer_info {
1532 uint64_t cb_color_base;
1533 uint64_t cb_color_cmask;
1534 uint64_t cb_color_fmask;
1535 uint64_t cb_dcc_base;
1536 uint32_t cb_color_pitch;
1537 uint32_t cb_color_slice;
1538 uint32_t cb_color_view;
1539 uint32_t cb_color_info;
1540 uint32_t cb_color_attrib;
1541 uint32_t cb_color_attrib2;
1542 uint32_t cb_dcc_control;
1543 uint32_t cb_color_cmask_slice;
1544 uint32_t cb_color_fmask_slice;
1545 };
1546
1547 struct radv_ds_buffer_info {
1548 uint64_t db_z_read_base;
1549 uint64_t db_stencil_read_base;
1550 uint64_t db_z_write_base;
1551 uint64_t db_stencil_write_base;
1552 uint64_t db_htile_data_base;
1553 uint32_t db_depth_info;
1554 uint32_t db_z_info;
1555 uint32_t db_stencil_info;
1556 uint32_t db_depth_view;
1557 uint32_t db_depth_size;
1558 uint32_t db_depth_slice;
1559 uint32_t db_htile_surface;
1560 uint32_t pa_su_poly_offset_db_fmt_cntl;
1561 uint32_t db_z_info2;
1562 uint32_t db_stencil_info2;
1563 float offset_scale;
1564 };
1565
1566 struct radv_attachment_info {
1567 union {
1568 struct radv_color_buffer_info cb;
1569 struct radv_ds_buffer_info ds;
1570 };
1571 struct radv_image_view *attachment;
1572 };
1573
1574 struct radv_framebuffer {
1575 uint32_t width;
1576 uint32_t height;
1577 uint32_t layers;
1578
1579 uint32_t attachment_count;
1580 struct radv_attachment_info attachments[0];
1581 };
1582
1583 struct radv_subpass_barrier {
1584 VkPipelineStageFlags src_stage_mask;
1585 VkAccessFlags src_access_mask;
1586 VkAccessFlags dst_access_mask;
1587 };
1588
1589 struct radv_subpass {
1590 uint32_t input_count;
1591 uint32_t color_count;
1592 VkAttachmentReference * input_attachments;
1593 VkAttachmentReference * color_attachments;
1594 VkAttachmentReference * resolve_attachments;
1595 VkAttachmentReference depth_stencil_attachment;
1596
1597 /** Subpass has at least one resolve attachment */
1598 bool has_resolve;
1599
1600 struct radv_subpass_barrier start_barrier;
1601
1602 uint32_t view_mask;
1603 };
1604
1605 struct radv_render_pass_attachment {
1606 VkFormat format;
1607 uint32_t samples;
1608 VkAttachmentLoadOp load_op;
1609 VkAttachmentLoadOp stencil_load_op;
1610 VkImageLayout initial_layout;
1611 VkImageLayout final_layout;
1612 uint32_t view_mask;
1613 };
1614
1615 struct radv_render_pass {
1616 uint32_t attachment_count;
1617 uint32_t subpass_count;
1618 VkAttachmentReference * subpass_attachments;
1619 struct radv_render_pass_attachment * attachments;
1620 struct radv_subpass_barrier end_barrier;
1621 struct radv_subpass subpasses[0];
1622 };
1623
1624 VkResult radv_device_init_meta(struct radv_device *device);
1625 void radv_device_finish_meta(struct radv_device *device);
1626
1627 struct radv_query_pool {
1628 struct radeon_winsys_bo *bo;
1629 uint32_t stride;
1630 uint32_t availability_offset;
1631 char *ptr;
1632 VkQueryType type;
1633 uint32_t pipeline_stats_mask;
1634 };
1635
1636 struct radv_semaphore {
1637 /* use a winsys sem for non-exportable */
1638 struct radeon_winsys_sem *sem;
1639 uint32_t syncobj;
1640 uint32_t temp_syncobj;
1641 };
1642
1643 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
1644 int num_wait_sems,
1645 const VkSemaphore *wait_sems,
1646 int num_signal_sems,
1647 const VkSemaphore *signal_sems,
1648 VkFence fence);
1649 void radv_free_sem_info(struct radv_winsys_sem_info *sem_info);
1650
1651 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
1652 struct radv_descriptor_set *set,
1653 unsigned idx);
1654
1655 void
1656 radv_update_descriptor_sets(struct radv_device *device,
1657 struct radv_cmd_buffer *cmd_buffer,
1658 VkDescriptorSet overrideSet,
1659 uint32_t descriptorWriteCount,
1660 const VkWriteDescriptorSet *pDescriptorWrites,
1661 uint32_t descriptorCopyCount,
1662 const VkCopyDescriptorSet *pDescriptorCopies);
1663
1664 void
1665 radv_update_descriptor_set_with_template(struct radv_device *device,
1666 struct radv_cmd_buffer *cmd_buffer,
1667 struct radv_descriptor_set *set,
1668 VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate,
1669 const void *pData);
1670
1671 void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
1672 VkPipelineBindPoint pipelineBindPoint,
1673 VkPipelineLayout _layout,
1674 uint32_t set,
1675 uint32_t descriptorWriteCount,
1676 const VkWriteDescriptorSet *pDescriptorWrites);
1677
1678 void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
1679 struct radv_image *image, uint32_t value);
1680 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
1681 struct radv_image *image, uint32_t value);
1682
1683 struct radv_fence {
1684 struct radeon_winsys_fence *fence;
1685 bool submitted;
1686 bool signalled;
1687
1688 uint32_t syncobj;
1689 uint32_t temp_syncobj;
1690 };
1691
1692 struct radeon_winsys_sem;
1693
1694 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \
1695 \
1696 static inline struct __radv_type * \
1697 __radv_type ## _from_handle(__VkType _handle) \
1698 { \
1699 return (struct __radv_type *) _handle; \
1700 } \
1701 \
1702 static inline __VkType \
1703 __radv_type ## _to_handle(struct __radv_type *_obj) \
1704 { \
1705 return (__VkType) _obj; \
1706 }
1707
1708 #define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType) \
1709 \
1710 static inline struct __radv_type * \
1711 __radv_type ## _from_handle(__VkType _handle) \
1712 { \
1713 return (struct __radv_type *)(uintptr_t) _handle; \
1714 } \
1715 \
1716 static inline __VkType \
1717 __radv_type ## _to_handle(struct __radv_type *_obj) \
1718 { \
1719 return (__VkType)(uintptr_t) _obj; \
1720 }
1721
1722 #define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
1723 struct __radv_type *__name = __radv_type ## _from_handle(__handle)
1724
1725 RADV_DEFINE_HANDLE_CASTS(radv_cmd_buffer, VkCommandBuffer)
1726 RADV_DEFINE_HANDLE_CASTS(radv_device, VkDevice)
1727 RADV_DEFINE_HANDLE_CASTS(radv_instance, VkInstance)
1728 RADV_DEFINE_HANDLE_CASTS(radv_physical_device, VkPhysicalDevice)
1729 RADV_DEFINE_HANDLE_CASTS(radv_queue, VkQueue)
1730
1731 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_cmd_pool, VkCommandPool)
1732 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer, VkBuffer)
1733 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_buffer_view, VkBufferView)
1734 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_pool, VkDescriptorPool)
1735 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set, VkDescriptorSet)
1736 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_set_layout, VkDescriptorSetLayout)
1737 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_descriptor_update_template, VkDescriptorUpdateTemplateKHR)
1738 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_device_memory, VkDeviceMemory)
1739 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_fence, VkFence)
1740 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_event, VkEvent)
1741 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_framebuffer, VkFramebuffer)
1742 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_image, VkImage)
1743 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_image_view, VkImageView);
1744 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_cache, VkPipelineCache)
1745 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, VkPipeline)
1746 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline_layout, VkPipelineLayout)
1747 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool)
1748 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass)
1749 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
1750 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)
1751 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore)
1752
1753 #endif /* RADV_PRIVATE_H */