turnip: input attachment descriptor set rework
[mesa.git] / src / freedreno / vulkan / tu_private.h
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 * DEALINGS IN THE SOFTWARE.
26 */
27
28 #ifndef TU_PRIVATE_H
29 #define TU_PRIVATE_H
30
31 #include <assert.h>
32 #include <pthread.h>
33 #include <stdbool.h>
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #ifdef HAVE_VALGRIND
39 #include <memcheck.h>
40 #include <valgrind.h>
41 #define VG(x) x
42 #else
43 #define VG(x) ((void)0)
44 #endif
45
46 #include "c11/threads.h"
47 #include "main/macros.h"
48 #include "util/list.h"
49 #include "util/macros.h"
50 #include "vk_alloc.h"
51 #include "vk_debug_report.h"
52 #include "wsi_common.h"
53
54 #include "drm-uapi/msm_drm.h"
55 #include "ir3/ir3_compiler.h"
56 #include "ir3/ir3_shader.h"
57
58 #include "adreno_common.xml.h"
59 #include "adreno_pm4.xml.h"
60 #include "a6xx.xml.h"
61 #include "fdl/freedreno_layout.h"
62
63 #include "tu_descriptor_set.h"
64 #include "tu_extensions.h"
65
66 /* Pre-declarations needed for WSI entrypoints */
67 struct wl_surface;
68 struct wl_display;
69 typedef struct xcb_connection_t xcb_connection_t;
70 typedef uint32_t xcb_visualid_t;
71 typedef uint32_t xcb_window_t;
72
73 #include <vulkan/vk_android_native_buffer.h>
74 #include <vulkan/vk_icd.h>
75 #include <vulkan/vulkan.h>
76 #include <vulkan/vulkan_intel.h>
77
78 #include "tu_entrypoints.h"
79
80 #include "vk_format.h"
81
82 #define MAX_VBS 32
83 #define MAX_VERTEX_ATTRIBS 32
84 #define MAX_RTS 8
85 #define MAX_VSC_PIPES 32
86 #define MAX_VIEWPORTS 1
87 #define MAX_SCISSORS 16
88 #define MAX_DISCARD_RECTANGLES 4
89 #define MAX_PUSH_CONSTANTS_SIZE 128
90 #define MAX_PUSH_DESCRIPTORS 32
91 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16
92 #define MAX_DYNAMIC_STORAGE_BUFFERS 8
93 #define MAX_DYNAMIC_BUFFERS \
94 (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
95 #define TU_MAX_DRM_DEVICES 8
96 #define MAX_VIEWS 8
97 #define MAX_BIND_POINTS 2 /* compute + graphics */
98 /* The Qualcomm driver exposes 0x20000058 */
99 #define MAX_STORAGE_BUFFER_RANGE 0x20000000
100 /* We use ldc for uniform buffer loads, just like the Qualcomm driver, so
101 * expose the same maximum range.
102 * TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual
103 * range might be higher.
104 */
105 #define MAX_UNIFORM_BUFFER_RANGE 0x10000
106
107 #define A6XX_TEX_CONST_DWORDS 16
108 #define A6XX_TEX_SAMP_DWORDS 4
109
110 #define tu_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
111
112 static inline uint32_t
113 tu_minify(uint32_t n, uint32_t levels)
114 {
115 if (unlikely(n == 0))
116 return 0;
117 else
118 return MAX2(n >> levels, 1);
119 }
120
121 #define for_each_bit(b, dword) \
122 for (uint32_t __dword = (dword); \
123 (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b)))
124
125 #define typed_memcpy(dest, src, count) \
126 ({ \
127 STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
128 memcpy((dest), (src), (count) * sizeof(*(src))); \
129 })
130
131 #define COND(bool, val) ((bool) ? (val) : 0)
132 #define BIT(bit) (1u << (bit))
133
134 /* Whenever we generate an error, pass it through this function. Useful for
135 * debugging, where we can break on it. Only call at error site, not when
136 * propagating errors. Might be useful to plug in a stack trace here.
137 */
138
139 struct tu_instance;
140
141 VkResult
142 __vk_errorf(struct tu_instance *instance,
143 VkResult error,
144 const char *file,
145 int line,
146 const char *format,
147 ...);
148
149 #define vk_error(instance, error) \
150 __vk_errorf(instance, error, __FILE__, __LINE__, NULL);
151 #define vk_errorf(instance, error, format, ...) \
152 __vk_errorf(instance, error, __FILE__, __LINE__, format, ##__VA_ARGS__);
153
154 void
155 __tu_finishme(const char *file, int line, const char *format, ...)
156 tu_printflike(3, 4);
157 void
158 tu_loge(const char *format, ...) tu_printflike(1, 2);
159 void
160 tu_logi(const char *format, ...) tu_printflike(1, 2);
161
162 /**
163 * Print a FINISHME message, including its source location.
164 */
165 #define tu_finishme(format, ...) \
166 do { \
167 static bool reported = false; \
168 if (!reported) { \
169 __tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
170 reported = true; \
171 } \
172 } while (0)
173
174 #define tu_stub() \
175 do { \
176 tu_finishme("stub %s", __func__); \
177 } while (0)
178
179 void *
180 tu_lookup_entrypoint_unchecked(const char *name);
181 void *
182 tu_lookup_entrypoint_checked(
183 const char *name,
184 uint32_t core_version,
185 const struct tu_instance_extension_table *instance,
186 const struct tu_device_extension_table *device);
187
188 struct tu_physical_device
189 {
190 VK_LOADER_DATA _loader_data;
191
192 struct tu_instance *instance;
193
194 char path[20];
195 char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
196 uint8_t driver_uuid[VK_UUID_SIZE];
197 uint8_t device_uuid[VK_UUID_SIZE];
198 uint8_t cache_uuid[VK_UUID_SIZE];
199
200 struct wsi_device wsi_device;
201
202 int local_fd;
203 int master_fd;
204
205 unsigned gpu_id;
206 uint32_t gmem_size;
207 uint64_t gmem_base;
208 uint32_t ccu_offset_gmem;
209 uint32_t ccu_offset_bypass;
210 /* alignment for size of tiles */
211 uint32_t tile_align_w;
212 #define TILE_ALIGN_H 16
213 /* gmem store/load granularity */
214 #define GMEM_ALIGN_W 16
215 #define GMEM_ALIGN_H 4
216
217 struct {
218 uint32_t PC_UNKNOWN_9805;
219 uint32_t SP_UNKNOWN_A0F8;
220 } magic;
221
222 /* This is the drivers on-disk cache used as a fallback as opposed to
223 * the pipeline cache defined by apps.
224 */
225 struct disk_cache *disk_cache;
226
227 struct tu_device_extension_table supported_extensions;
228 };
229
230 enum tu_debug_flags
231 {
232 TU_DEBUG_STARTUP = 1 << 0,
233 TU_DEBUG_NIR = 1 << 1,
234 TU_DEBUG_IR3 = 1 << 2,
235 TU_DEBUG_NOBIN = 1 << 3,
236 TU_DEBUG_SYSMEM = 1 << 4,
237 TU_DEBUG_FORCEBIN = 1 << 5,
238 TU_DEBUG_NOUBWC = 1 << 6,
239 };
240
241 struct tu_instance
242 {
243 VK_LOADER_DATA _loader_data;
244
245 VkAllocationCallbacks alloc;
246
247 uint32_t api_version;
248 int physical_device_count;
249 struct tu_physical_device physical_devices[TU_MAX_DRM_DEVICES];
250
251 enum tu_debug_flags debug_flags;
252
253 struct vk_debug_report_instance debug_report_callbacks;
254
255 struct tu_instance_extension_table enabled_extensions;
256 };
257
258 VkResult
259 tu_wsi_init(struct tu_physical_device *physical_device);
260 void
261 tu_wsi_finish(struct tu_physical_device *physical_device);
262
263 bool
264 tu_instance_extension_supported(const char *name);
265 uint32_t
266 tu_physical_device_api_version(struct tu_physical_device *dev);
267 bool
268 tu_physical_device_extension_supported(struct tu_physical_device *dev,
269 const char *name);
270
271 struct cache_entry;
272
273 struct tu_pipeline_cache
274 {
275 struct tu_device *device;
276 pthread_mutex_t mutex;
277
278 uint32_t total_size;
279 uint32_t table_size;
280 uint32_t kernel_count;
281 struct cache_entry **hash_table;
282 bool modified;
283
284 VkAllocationCallbacks alloc;
285 };
286
287 struct tu_pipeline_key
288 {
289 };
290
291
292 /* queue types */
293 #define TU_QUEUE_GENERAL 0
294
295 #define TU_MAX_QUEUE_FAMILIES 1
296
297 struct tu_fence
298 {
299 struct wsi_fence *fence_wsi;
300 bool signaled;
301 int fd;
302 };
303
304 void
305 tu_fence_init(struct tu_fence *fence, bool signaled);
306 void
307 tu_fence_finish(struct tu_fence *fence);
308 void
309 tu_fence_update_fd(struct tu_fence *fence, int fd);
310 void
311 tu_fence_copy(struct tu_fence *fence, const struct tu_fence *src);
312 void
313 tu_fence_signal(struct tu_fence *fence);
314 void
315 tu_fence_wait_idle(struct tu_fence *fence);
316
317 struct tu_queue
318 {
319 VK_LOADER_DATA _loader_data;
320 struct tu_device *device;
321 uint32_t queue_family_index;
322 int queue_idx;
323 VkDeviceQueueCreateFlags flags;
324
325 uint32_t msm_queue_id;
326 struct tu_fence submit_fence;
327 };
328
329 struct tu_bo
330 {
331 uint32_t gem_handle;
332 uint64_t size;
333 uint64_t iova;
334 void *map;
335 };
336
337 struct tu_device
338 {
339 VK_LOADER_DATA _loader_data;
340
341 VkAllocationCallbacks alloc;
342
343 struct tu_instance *instance;
344
345 struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES];
346 int queue_count[TU_MAX_QUEUE_FAMILIES];
347
348 struct tu_physical_device *physical_device;
349
350 struct ir3_compiler *compiler;
351
352 /* Backup in-memory cache to be used if the app doesn't provide one */
353 struct tu_pipeline_cache *mem_cache;
354
355 struct tu_bo vsc_draw_strm;
356 struct tu_bo vsc_prim_strm;
357 uint32_t vsc_draw_strm_pitch;
358 uint32_t vsc_prim_strm_pitch;
359
360 #define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
361
362 /* Currently the kernel driver uses a 32-bit GPU address space, but it
363 * should be impossible to go beyond 48 bits.
364 */
365 struct {
366 struct tu_bo bo;
367 mtx_t construct_mtx;
368 bool initialized;
369 } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
370
371 struct tu_bo border_color;
372
373 struct tu_device_extension_table enabled_extensions;
374 };
375
376 VkResult
377 tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size);
378 VkResult
379 tu_bo_init_dmabuf(struct tu_device *dev,
380 struct tu_bo *bo,
381 uint64_t size,
382 int fd);
383 int
384 tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo);
385 void
386 tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);
387 VkResult
388 tu_bo_map(struct tu_device *dev, struct tu_bo *bo);
389
390 /* Get a scratch bo for use inside a command buffer. This will always return
391 * the same bo given the same size or similar sizes, so only one scratch bo
392 * can be used at the same time. It's meant for short-lived things where we
393 * need to write to some piece of memory, read from it, and then immediately
394 * discard it.
395 */
396 VkResult
397 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);
398
399 struct tu_cs_entry
400 {
401 /* No ownership */
402 const struct tu_bo *bo;
403
404 uint32_t size;
405 uint32_t offset;
406 };
407
408 struct ts_cs_memory {
409 uint32_t *map;
410 uint64_t iova;
411 };
412
413 struct tu_draw_state {
414 uint64_t iova : 48;
415 uint32_t size : 16;
416 };
417
418 enum tu_dynamic_state
419 {
420 /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */
421 TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,
422 TU_DYNAMIC_STATE_COUNT,
423 };
424
425 enum tu_draw_state_group_id
426 {
427 TU_DRAW_STATE_PROGRAM,
428 TU_DRAW_STATE_PROGRAM_BINNING,
429 TU_DRAW_STATE_VB,
430 TU_DRAW_STATE_VI,
431 TU_DRAW_STATE_VI_BINNING,
432 TU_DRAW_STATE_RAST,
433 TU_DRAW_STATE_DS,
434 TU_DRAW_STATE_BLEND,
435 TU_DRAW_STATE_VS_CONST,
436 TU_DRAW_STATE_GS_CONST,
437 TU_DRAW_STATE_FS_CONST,
438 TU_DRAW_STATE_DESC_SETS,
439 TU_DRAW_STATE_DESC_SETS_LOAD,
440 TU_DRAW_STATE_VS_PARAMS,
441
442 /* dynamic state related draw states */
443 TU_DRAW_STATE_DYNAMIC,
444 TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,
445 };
446
447 enum tu_cs_mode
448 {
449
450 /*
451 * A command stream in TU_CS_MODE_GROW mode grows automatically whenever it
452 * is full. tu_cs_begin must be called before command packet emission and
453 * tu_cs_end must be called after.
454 *
455 * This mode may create multiple entries internally. The entries must be
456 * submitted together.
457 */
458 TU_CS_MODE_GROW,
459
460 /*
461 * A command stream in TU_CS_MODE_EXTERNAL mode wraps an external,
462 * fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no
463 * effect on it.
464 *
465 * This mode does not create any entry or any BO.
466 */
467 TU_CS_MODE_EXTERNAL,
468
469 /*
470 * A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct
471 * command packet emission. tu_cs_begin_sub_stream must be called to get a
472 * sub-stream to emit comamnd packets to. When done with the sub-stream,
473 * tu_cs_end_sub_stream must be called.
474 *
475 * This mode does not create any entry internally.
476 */
477 TU_CS_MODE_SUB_STREAM,
478 };
479
480 struct tu_cs
481 {
482 uint32_t *start;
483 uint32_t *cur;
484 uint32_t *reserved_end;
485 uint32_t *end;
486
487 struct tu_device *device;
488 enum tu_cs_mode mode;
489 uint32_t next_bo_size;
490
491 struct tu_cs_entry *entries;
492 uint32_t entry_count;
493 uint32_t entry_capacity;
494
495 struct tu_bo **bos;
496 uint32_t bo_count;
497 uint32_t bo_capacity;
498
499 /* state for cond_exec_start/cond_exec_end */
500 uint32_t cond_flags;
501 uint32_t *cond_dwords;
502 };
503
504 struct tu_device_memory
505 {
506 struct tu_bo bo;
507 VkDeviceSize size;
508
509 /* for dedicated allocations */
510 struct tu_image *image;
511 struct tu_buffer *buffer;
512
513 uint32_t type_index;
514 void *map;
515 void *user_ptr;
516 };
517
518 struct tu_descriptor_range
519 {
520 uint64_t va;
521 uint32_t size;
522 };
523
524 struct tu_descriptor_set
525 {
526 const struct tu_descriptor_set_layout *layout;
527 struct tu_descriptor_pool *pool;
528 uint32_t size;
529
530 uint64_t va;
531 uint32_t *mapped_ptr;
532
533 uint32_t *dynamic_descriptors;
534
535 struct tu_bo *buffers[0];
536 };
537
538 struct tu_push_descriptor_set
539 {
540 struct tu_descriptor_set set;
541 uint32_t capacity;
542 };
543
544 struct tu_descriptor_pool_entry
545 {
546 uint32_t offset;
547 uint32_t size;
548 struct tu_descriptor_set *set;
549 };
550
551 struct tu_descriptor_pool
552 {
553 struct tu_bo bo;
554 uint64_t current_offset;
555 uint64_t size;
556
557 uint8_t *host_memory_base;
558 uint8_t *host_memory_ptr;
559 uint8_t *host_memory_end;
560
561 uint32_t entry_count;
562 uint32_t max_entry_count;
563 struct tu_descriptor_pool_entry entries[0];
564 };
565
566 struct tu_descriptor_update_template_entry
567 {
568 VkDescriptorType descriptor_type;
569
570 /* The number of descriptors to update */
571 uint32_t descriptor_count;
572
573 /* Into mapped_ptr or dynamic_descriptors, in units of the respective array
574 */
575 uint32_t dst_offset;
576
577 /* In dwords. Not valid/used for dynamic descriptors */
578 uint32_t dst_stride;
579
580 uint32_t buffer_offset;
581
582 /* Only valid for combined image samplers and samplers */
583 uint16_t has_sampler;
584
585 /* In bytes */
586 size_t src_offset;
587 size_t src_stride;
588
589 /* For push descriptors */
590 const uint32_t *immutable_samplers;
591 };
592
593 struct tu_descriptor_update_template
594 {
595 uint32_t entry_count;
596 struct tu_descriptor_update_template_entry entry[0];
597 };
598
599 struct tu_buffer
600 {
601 VkDeviceSize size;
602
603 VkBufferUsageFlags usage;
604 VkBufferCreateFlags flags;
605
606 struct tu_bo *bo;
607 VkDeviceSize bo_offset;
608 };
609
610 static inline uint64_t
611 tu_buffer_iova(struct tu_buffer *buffer)
612 {
613 return buffer->bo->iova + buffer->bo_offset;
614 }
615
616 struct tu_vertex_binding
617 {
618 struct tu_buffer *buffer;
619 VkDeviceSize offset;
620 };
621
622 const char *
623 tu_get_debug_option_name(int id);
624
625 const char *
626 tu_get_perftest_option_name(int id);
627
628 struct tu_descriptor_state
629 {
630 struct tu_descriptor_set *sets[MAX_SETS];
631 uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
632 };
633
634 struct tu_tile
635 {
636 uint8_t pipe;
637 uint8_t slot;
638 VkOffset2D begin;
639 VkOffset2D end;
640 };
641
642 struct tu_tiling_config
643 {
644 VkRect2D render_area;
645
646 /* position and size of the first tile */
647 VkRect2D tile0;
648 /* number of tiles */
649 VkExtent2D tile_count;
650
651 /* size of the first VSC pipe */
652 VkExtent2D pipe0;
653 /* number of VSC pipes */
654 VkExtent2D pipe_count;
655
656 /* pipe register values */
657 uint32_t pipe_config[MAX_VSC_PIPES];
658 uint32_t pipe_sizes[MAX_VSC_PIPES];
659
660 /* Whether sysmem rendering must be used */
661 bool force_sysmem;
662 };
663
664 enum tu_cmd_dirty_bits
665 {
666 TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
667 TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2,
668 TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3,
669 TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4,
670 TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
671 TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6,
672 /* all draw states were disabled and need to be re-enabled: */
673 TU_CMD_DIRTY_DRAW_STATE = 1 << 7,
674 };
675
676 struct tu_streamout_state {
677 uint16_t stride[IR3_MAX_SO_BUFFERS];
678 uint32_t ncomp[IR3_MAX_SO_BUFFERS];
679 uint32_t prog[IR3_MAX_SO_OUTPUTS * 2];
680 uint32_t prog_count;
681 uint32_t vpc_so_buf_cntl;
682 };
683
684 /* There are only three cache domains we have to care about: the CCU, or
685 * color cache unit, which is used for color and depth/stencil attachments
686 * and copy/blit destinations, and is split conceptually into color and depth,
687 * and the universal cache or UCHE which is used for pretty much everything
688 * else, except for the CP (uncached) and host. We need to flush whenever data
689 * crosses these boundaries.
690 */
691
692 enum tu_cmd_access_mask {
693 TU_ACCESS_UCHE_READ = 1 << 0,
694 TU_ACCESS_UCHE_WRITE = 1 << 1,
695 TU_ACCESS_CCU_COLOR_READ = 1 << 2,
696 TU_ACCESS_CCU_COLOR_WRITE = 1 << 3,
697 TU_ACCESS_CCU_DEPTH_READ = 1 << 4,
698 TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5,
699
700 /* Experiments have shown that while it's safe to avoid flushing the CCU
701 * after each blit/renderpass, it's not safe to assume that subsequent
702 * lookups with a different attachment state will hit unflushed cache
703 * entries. That is, the CCU needs to be flushed and possibly invalidated
704 * when accessing memory with a different attachment state. Writing to an
705 * attachment under the following conditions after clearing using the
706 * normal 2d engine path is known to have issues:
707 *
708 * - It isn't the 0'th layer.
709 * - There are more than one attachment, and this isn't the 0'th attachment
710 * (this seems to also depend on the cpp of the attachments).
711 *
712 * Our best guess is that the layer/MRT state is used when computing
713 * the location of a cache entry in CCU, to avoid conflicts. We assume that
714 * any access in a renderpass after or before an access by a transfer needs
715 * a flush/invalidate, and use the _INCOHERENT variants to represent access
716 * by a transfer.
717 */
718 TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6,
719 TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7,
720 TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8,
721 TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9,
722
723 TU_ACCESS_SYSMEM_READ = 1 << 10,
724 TU_ACCESS_SYSMEM_WRITE = 1 << 11,
725
726 /* Set if a WFI is required due to data being read by the CP or the 2D
727 * engine.
728 */
729 TU_ACCESS_WFI_READ = 1 << 12,
730
731 TU_ACCESS_READ =
732 TU_ACCESS_UCHE_READ |
733 TU_ACCESS_CCU_COLOR_READ |
734 TU_ACCESS_CCU_DEPTH_READ |
735 TU_ACCESS_CCU_COLOR_INCOHERENT_READ |
736 TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |
737 TU_ACCESS_SYSMEM_READ,
738
739 TU_ACCESS_WRITE =
740 TU_ACCESS_UCHE_WRITE |
741 TU_ACCESS_CCU_COLOR_WRITE |
742 TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE |
743 TU_ACCESS_CCU_DEPTH_WRITE |
744 TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |
745 TU_ACCESS_SYSMEM_WRITE,
746
747 TU_ACCESS_ALL =
748 TU_ACCESS_READ |
749 TU_ACCESS_WRITE,
750 };
751
752 enum tu_cmd_flush_bits {
753 TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,
754 TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,
755 TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,
756 TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,
757 TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,
758 TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,
759
760 TU_CMD_FLAG_ALL_FLUSH =
761 TU_CMD_FLAG_CCU_FLUSH_DEPTH |
762 TU_CMD_FLAG_CCU_FLUSH_COLOR |
763 TU_CMD_FLAG_CACHE_FLUSH,
764
765 TU_CMD_FLAG_ALL_INVALIDATE =
766 TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |
767 TU_CMD_FLAG_CCU_INVALIDATE_COLOR |
768 TU_CMD_FLAG_CACHE_INVALIDATE,
769
770 TU_CMD_FLAG_WFI = 1 << 6,
771 };
772
773 /* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty
774 * heavy, involving a CCU cache flush/invalidate and a WFI in order to change
775 * which part of the gmem is used by the CCU. Here we keep track of what the
776 * state of the CCU.
777 */
778 enum tu_cmd_ccu_state {
779 TU_CMD_CCU_SYSMEM,
780 TU_CMD_CCU_GMEM,
781 TU_CMD_CCU_UNKNOWN,
782 };
783
784 struct tu_cache_state {
785 /* Caches which must be made available (flushed) eventually if there are
786 * any users outside that cache domain, and caches which must be
787 * invalidated eventually if there are any reads.
788 */
789 enum tu_cmd_flush_bits pending_flush_bits;
790 /* Pending flushes */
791 enum tu_cmd_flush_bits flush_bits;
792 };
793
794 struct tu_cmd_state
795 {
796 uint32_t dirty;
797
798 struct tu_pipeline *pipeline;
799 struct tu_pipeline *compute_pipeline;
800
801 /* Vertex buffers */
802 struct
803 {
804 struct tu_buffer *buffers[MAX_VBS];
805 VkDeviceSize offsets[MAX_VBS];
806 } vb;
807
808 /* for dynamic states that can't be emitted directly */
809 uint32_t dynamic_stencil_mask;
810 uint32_t dynamic_stencil_wrmask;
811 uint32_t dynamic_stencil_ref;
812 uint32_t dynamic_gras_su_cntl;
813
814 /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */
815 struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
816 struct tu_cs_entry vertex_buffers_ib;
817 struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES];
818 struct tu_cs_entry desc_sets_ib, desc_sets_load_ib;
819
820 /* Stream output buffers */
821 struct
822 {
823 struct tu_buffer *buffers[IR3_MAX_SO_BUFFERS];
824 VkDeviceSize offsets[IR3_MAX_SO_BUFFERS];
825 VkDeviceSize sizes[IR3_MAX_SO_BUFFERS];
826 } streamout_buf;
827
828 uint8_t streamout_reset;
829 uint8_t streamout_enabled;
830
831 /* Index buffer */
832 struct tu_buffer *index_buffer;
833 uint64_t index_offset;
834 uint32_t index_type;
835 uint32_t max_index_count;
836 uint64_t index_va;
837
838 /* Renderpasses are tricky, because we may need to flush differently if
839 * using sysmem vs. gmem and therefore we have to delay any flushing that
840 * happens before a renderpass. So we have to have two copies of the flush
841 * state, one for intra-renderpass flushes (i.e. renderpass dependencies)
842 * and one for outside a renderpass.
843 */
844 struct tu_cache_state cache;
845 struct tu_cache_state renderpass_cache;
846
847 enum tu_cmd_ccu_state ccu_state;
848
849 const struct tu_render_pass *pass;
850 const struct tu_subpass *subpass;
851 const struct tu_framebuffer *framebuffer;
852
853 struct tu_tiling_config tiling_config;
854
855 struct tu_cs_entry tile_store_ib;
856 };
857
858 struct tu_cmd_pool
859 {
860 VkAllocationCallbacks alloc;
861 struct list_head cmd_buffers;
862 struct list_head free_cmd_buffers;
863 uint32_t queue_family_index;
864 };
865
866 struct tu_cmd_buffer_upload
867 {
868 uint8_t *map;
869 unsigned offset;
870 uint64_t size;
871 struct list_head list;
872 };
873
874 enum tu_cmd_buffer_status
875 {
876 TU_CMD_BUFFER_STATUS_INVALID,
877 TU_CMD_BUFFER_STATUS_INITIAL,
878 TU_CMD_BUFFER_STATUS_RECORDING,
879 TU_CMD_BUFFER_STATUS_EXECUTABLE,
880 TU_CMD_BUFFER_STATUS_PENDING,
881 };
882
883 struct tu_bo_list
884 {
885 uint32_t count;
886 uint32_t capacity;
887 struct drm_msm_gem_submit_bo *bo_infos;
888 };
889
890 #define TU_BO_LIST_FAILED (~0)
891
892 void
893 tu_bo_list_init(struct tu_bo_list *list);
894 void
895 tu_bo_list_destroy(struct tu_bo_list *list);
896 void
897 tu_bo_list_reset(struct tu_bo_list *list);
898 uint32_t
899 tu_bo_list_add(struct tu_bo_list *list,
900 const struct tu_bo *bo,
901 uint32_t flags);
902 VkResult
903 tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other);
904
905 /* This struct defines the layout of the scratch_bo */
906 struct tu6_control
907 {
908 uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
909 uint32_t _pad0;
910 volatile uint32_t vsc_overflow;
911 uint32_t _pad1;
912 /* flag set from cmdstream when VSC overflow detected: */
913 uint32_t vsc_scratch;
914 uint32_t _pad2;
915 uint32_t _pad3;
916 uint32_t _pad4;
917
918 /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
919 struct {
920 uint32_t offset;
921 uint32_t pad[7];
922 } flush_base[4];
923 };
924
925 #define ctrl_offset(member) offsetof(struct tu6_control, member)
926
927 struct tu_cmd_buffer
928 {
929 VK_LOADER_DATA _loader_data;
930
931 struct tu_device *device;
932
933 struct tu_cmd_pool *pool;
934 struct list_head pool_link;
935
936 VkCommandBufferUsageFlags usage_flags;
937 VkCommandBufferLevel level;
938 enum tu_cmd_buffer_status status;
939
940 struct tu_cmd_state state;
941 struct tu_vertex_binding vertex_bindings[MAX_VBS];
942 uint32_t vertex_bindings_set;
943 uint32_t queue_family_index;
944
945 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
946 VkShaderStageFlags push_constant_stages;
947 struct tu_descriptor_set meta_push_descriptors;
948
949 struct tu_descriptor_state descriptors[MAX_BIND_POINTS];
950
951 struct tu_cmd_buffer_upload upload;
952
953 VkResult record_result;
954
955 struct tu_bo_list bo_list;
956 struct tu_cs cs;
957 struct tu_cs draw_cs;
958 struct tu_cs draw_epilogue_cs;
959 struct tu_cs sub_cs;
960
961 struct tu_bo scratch_bo;
962
963 struct tu_bo vsc_draw_strm;
964 struct tu_bo vsc_prim_strm;
965 uint32_t vsc_draw_strm_pitch;
966 uint32_t vsc_prim_strm_pitch;
967 bool use_vsc_data;
968 };
969
970 /* Temporary struct for tracking a register state to be written, used by
971 * a6xx-pack.h and tu_cs_emit_regs()
972 */
973 struct tu_reg_value {
974 uint32_t reg;
975 uint64_t value;
976 bool is_address;
977 struct tu_bo *bo;
978 bool bo_write;
979 uint32_t bo_offset;
980 uint32_t bo_shift;
981 };
982
983
984 void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer,
985 struct tu_cs *cs);
986
987 void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,
988 struct tu_cs *cs,
989 enum tu_cmd_ccu_state ccu_state);
990
991 void
992 tu6_emit_event_write(struct tu_cmd_buffer *cmd,
993 struct tu_cs *cs,
994 enum vgt_event_type event);
995
996 static inline struct tu_descriptor_state *
997 tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,
998 VkPipelineBindPoint bind_point)
999 {
1000 return &cmd_buffer->descriptors[bind_point];
1001 }
1002
1003 struct tu_event
1004 {
1005 struct tu_bo bo;
1006 };
1007
1008 static inline gl_shader_stage
1009 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
1010 {
1011 assert(__builtin_popcount(vk_stage) == 1);
1012 return ffs(vk_stage) - 1;
1013 }
1014
1015 static inline VkShaderStageFlagBits
1016 mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
1017 {
1018 return (1 << mesa_stage);
1019 }
1020
1021 #define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
1022
1023 #define tu_foreach_stage(stage, stage_bits) \
1024 for (gl_shader_stage stage, \
1025 __tmp = (gl_shader_stage)((stage_bits) &TU_STAGE_MASK); \
1026 stage = __builtin_ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
1027
1028 uint32_t
1029 tu6_stage2opcode(gl_shader_stage type);
1030 enum a6xx_state_block
1031 tu6_stage2shadersb(gl_shader_stage type);
1032
1033 struct tu_shader_module
1034 {
1035 unsigned char sha1[20];
1036
1037 uint32_t code_size;
1038 const uint32_t *code[0];
1039 };
1040
1041 struct tu_push_constant_range
1042 {
1043 uint32_t lo;
1044 uint32_t count;
1045 };
1046
1047 struct tu_shader
1048 {
1049 struct ir3_shader *ir3_shader;
1050
1051 struct tu_push_constant_range push_consts;
1052 uint8_t active_desc_sets;
1053 };
1054
1055 struct tu_shader *
1056 tu_shader_create(struct tu_device *dev,
1057 gl_shader_stage stage,
1058 const VkPipelineShaderStageCreateInfo *stage_info,
1059 struct tu_pipeline_layout *layout,
1060 const VkAllocationCallbacks *alloc);
1061
1062 void
1063 tu_shader_destroy(struct tu_device *dev,
1064 struct tu_shader *shader,
1065 const VkAllocationCallbacks *alloc);
1066
1067 struct tu_program_descriptor_linkage
1068 {
1069 struct ir3_ubo_analysis_state ubo_state;
1070 struct ir3_const_state const_state;
1071
1072 uint32_t constlen;
1073
1074 struct tu_push_constant_range push_consts;
1075 };
1076
1077 struct tu_pipeline
1078 {
1079 struct tu_cs cs;
1080
1081 struct tu_pipeline_layout *layout;
1082
1083 bool need_indirect_descriptor_sets;
1084 VkShaderStageFlags active_stages;
1085 uint32_t active_desc_sets;
1086
1087 struct tu_streamout_state streamout;
1088
1089 /* mask of enabled dynamic states
1090 * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
1091 */
1092 uint32_t dynamic_state_mask;
1093 struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];
1094
1095 /* gras_su_cntl without line width, used for dynamic line width state */
1096 uint32_t gras_su_cntl;
1097
1098 struct
1099 {
1100 struct tu_bo binary_bo;
1101 struct tu_cs_entry state_ib;
1102 struct tu_cs_entry binning_state_ib;
1103
1104 struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];
1105 } program;
1106
1107 struct
1108 {
1109 struct tu_cs_entry state_ib;
1110 } load_state;
1111
1112 struct
1113 {
1114 struct tu_cs_entry state_ib;
1115 struct tu_cs_entry binning_state_ib;
1116 uint32_t bindings_used;
1117 } vi;
1118
1119 struct
1120 {
1121 enum pc_di_primtype primtype;
1122 bool primitive_restart;
1123 } ia;
1124
1125 struct
1126 {
1127 struct tu_cs_entry state_ib;
1128 } rast;
1129
1130 struct
1131 {
1132 struct tu_cs_entry state_ib;
1133 } ds;
1134
1135 struct
1136 {
1137 struct tu_cs_entry state_ib;
1138 } blend;
1139
1140 struct
1141 {
1142 uint32_t local_size[3];
1143 } compute;
1144 };
1145
1146 void
1147 tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport);
1148
1149 void
1150 tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor);
1151
1152 void
1153 tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
1154
1155 void
1156 tu6_emit_depth_bias(struct tu_cs *cs,
1157 float constant_factor,
1158 float clamp,
1159 float slope_factor);
1160
1161 void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);
1162
1163 void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);
1164
1165 void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
1166
1167 void
1168 tu6_emit_xs_config(struct tu_cs *cs,
1169 gl_shader_stage stage,
1170 const struct ir3_shader_variant *xs,
1171 uint64_t binary_iova);
1172
1173 void
1174 tu6_emit_vpc(struct tu_cs *cs,
1175 const struct ir3_shader_variant *vs,
1176 const struct ir3_shader_variant *gs,
1177 const struct ir3_shader_variant *fs,
1178 struct tu_streamout_state *tf);
1179
1180 void
1181 tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);
1182
1183 struct tu_image_view;
1184
1185 void
1186 tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
1187 struct tu_cs *cs,
1188 struct tu_image_view *src,
1189 struct tu_image_view *dst,
1190 uint32_t layers,
1191 const VkRect2D *rect);
1192
1193 void
1194 tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
1195 struct tu_cs *cs,
1196 uint32_t a,
1197 const VkRenderPassBeginInfo *info);
1198
1199 void
1200 tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
1201 struct tu_cs *cs,
1202 uint32_t a,
1203 const VkRenderPassBeginInfo *info);
1204
1205 void
1206 tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
1207 struct tu_cs *cs,
1208 uint32_t a,
1209 bool force_load);
1210
1211 /* expose this function to be able to emit load without checking LOAD_OP */
1212 void
1213 tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);
1214
1215 /* note: gmem store can also resolve */
1216 void
1217 tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
1218 struct tu_cs *cs,
1219 uint32_t a,
1220 uint32_t gmem_a);
1221
1222 enum tu_supported_formats {
1223 FMT_VERTEX = 1,
1224 FMT_TEXTURE = 2,
1225 FMT_COLOR = 4,
1226 };
1227
1228 struct tu_native_format
1229 {
1230 enum a6xx_format fmt : 8;
1231 enum a3xx_color_swap swap : 8;
1232 enum a6xx_tile_mode tile_mode : 8;
1233 enum tu_supported_formats supported : 8;
1234 };
1235
1236 struct tu_native_format tu6_format_vtx(VkFormat format);
1237 struct tu_native_format tu6_format_color(VkFormat format, enum a6xx_tile_mode tile_mode);
1238 struct tu_native_format tu6_format_texture(VkFormat format, enum a6xx_tile_mode tile_mode);
1239
1240 static inline enum a6xx_format
1241 tu6_base_format(VkFormat format)
1242 {
1243 /* note: tu6_format_color doesn't care about tiling for .fmt field */
1244 return tu6_format_color(format, TILE6_LINEAR).fmt;
1245 }
1246
1247 enum a6xx_depth_format tu6_pipe2depth(VkFormat format);
1248
1249 struct tu_image
1250 {
1251 VkImageType type;
1252 /* The original VkFormat provided by the client. This may not match any
1253 * of the actual surface formats.
1254 */
1255 VkFormat vk_format;
1256 VkImageAspectFlags aspects;
1257 VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
1258 VkImageTiling tiling; /** VkImageCreateInfo::tiling */
1259 VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
1260 VkExtent3D extent;
1261 uint32_t level_count;
1262 uint32_t layer_count;
1263 VkSampleCountFlagBits samples;
1264
1265 struct fdl_layout layout;
1266
1267 unsigned queue_family_mask;
1268 bool exclusive;
1269 bool shareable;
1270
1271 /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
1272 VkDeviceMemory owned_memory;
1273
1274 /* Set when bound */
1275 struct tu_bo *bo;
1276 VkDeviceSize bo_offset;
1277 };
1278
1279 static inline uint32_t
1280 tu_get_layerCount(const struct tu_image *image,
1281 const VkImageSubresourceRange *range)
1282 {
1283 return range->layerCount == VK_REMAINING_ARRAY_LAYERS
1284 ? image->layer_count - range->baseArrayLayer
1285 : range->layerCount;
1286 }
1287
1288 static inline uint32_t
1289 tu_get_levelCount(const struct tu_image *image,
1290 const VkImageSubresourceRange *range)
1291 {
1292 return range->levelCount == VK_REMAINING_MIP_LEVELS
1293 ? image->level_count - range->baseMipLevel
1294 : range->levelCount;
1295 }
1296
1297 enum a3xx_msaa_samples
1298 tu_msaa_samples(uint32_t samples);
1299 enum a6xx_tex_fetchsize
1300 tu6_fetchsize(VkFormat format);
1301
1302 struct tu_image_view
1303 {
1304 struct tu_image *image; /**< VkImageViewCreateInfo::image */
1305
1306 uint64_t base_addr;
1307 uint64_t ubwc_addr;
1308 uint32_t layer_size;
1309 uint32_t ubwc_layer_size;
1310
1311 /* used to determine if fast gmem store path can be used */
1312 VkExtent2D extent;
1313 bool need_y2_align;
1314
1315 bool ubwc_enabled;
1316
1317 uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
1318
1319 /* Descriptor for use as a storage image as opposed to a sampled image.
1320 * This has a few differences for cube maps (e.g. type).
1321 */
1322 uint32_t storage_descriptor[A6XX_TEX_CONST_DWORDS];
1323
1324 /* pre-filled register values */
1325 uint32_t PITCH;
1326 uint32_t FLAG_BUFFER_PITCH;
1327
1328 uint32_t RB_MRT_BUF_INFO;
1329 uint32_t SP_FS_MRT_REG;
1330
1331 uint32_t SP_PS_2D_SRC_INFO;
1332 uint32_t SP_PS_2D_SRC_SIZE;
1333
1334 uint32_t RB_2D_DST_INFO;
1335
1336 uint32_t RB_BLIT_DST_INFO;
1337 };
1338
1339 struct tu_sampler_ycbcr_conversion {
1340 VkFormat format;
1341 VkSamplerYcbcrModelConversion ycbcr_model;
1342 VkSamplerYcbcrRange ycbcr_range;
1343 VkComponentMapping components;
1344 VkChromaLocation chroma_offsets[2];
1345 VkFilter chroma_filter;
1346 };
1347
1348 struct tu_sampler {
1349 uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];
1350 struct tu_sampler_ycbcr_conversion *ycbcr_sampler;
1351 };
1352
1353 void
1354 tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1355
1356 void
1357 tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src);
1358
1359 void
1360 tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1361
1362 enum a6xx_tex_filter
1363 tu6_tex_filter(VkFilter filter, unsigned aniso);
1364
1365 VkResult
1366 tu_image_create(VkDevice _device,
1367 const VkImageCreateInfo *pCreateInfo,
1368 const VkAllocationCallbacks *alloc,
1369 VkImage *pImage,
1370 uint64_t modifier,
1371 const VkSubresourceLayout *plane_layouts);
1372
1373 VkResult
1374 tu_image_from_gralloc(VkDevice device_h,
1375 const VkImageCreateInfo *base_info,
1376 const VkNativeBufferANDROID *gralloc_info,
1377 const VkAllocationCallbacks *alloc,
1378 VkImage *out_image_h);
1379
1380 void
1381 tu_image_view_init(struct tu_image_view *view,
1382 const VkImageViewCreateInfo *pCreateInfo);
1383
1384 struct tu_buffer_view
1385 {
1386 uint32_t descriptor[A6XX_TEX_CONST_DWORDS];
1387
1388 struct tu_buffer *buffer;
1389 };
1390 void
1391 tu_buffer_view_init(struct tu_buffer_view *view,
1392 struct tu_device *device,
1393 const VkBufferViewCreateInfo *pCreateInfo);
1394
1395 struct tu_attachment_info
1396 {
1397 struct tu_image_view *attachment;
1398 };
1399
1400 struct tu_framebuffer
1401 {
1402 uint32_t width;
1403 uint32_t height;
1404 uint32_t layers;
1405
1406 uint32_t attachment_count;
1407 struct tu_attachment_info attachments[0];
1408 };
1409
1410 struct tu_subpass_barrier {
1411 VkPipelineStageFlags src_stage_mask;
1412 VkAccessFlags src_access_mask;
1413 VkAccessFlags dst_access_mask;
1414 bool incoherent_ccu_color, incoherent_ccu_depth;
1415 };
1416
1417 struct tu_subpass_attachment
1418 {
1419 uint32_t attachment;
1420 VkImageLayout layout;
1421 };
1422
1423 struct tu_subpass
1424 {
1425 uint32_t input_count;
1426 uint32_t color_count;
1427 struct tu_subpass_attachment *input_attachments;
1428 struct tu_subpass_attachment *color_attachments;
1429 struct tu_subpass_attachment *resolve_attachments;
1430 struct tu_subpass_attachment depth_stencil_attachment;
1431
1432 VkSampleCountFlagBits samples;
1433 bool has_external_src, has_external_dst;
1434
1435 uint32_t srgb_cntl;
1436
1437 struct tu_subpass_barrier start_barrier;
1438 };
1439
1440 struct tu_render_pass_attachment
1441 {
1442 VkFormat format;
1443 uint32_t samples;
1444 uint32_t cpp;
1445 VkImageAspectFlags clear_mask;
1446 bool load;
1447 bool store;
1448 VkImageLayout initial_layout, final_layout;
1449 int32_t gmem_offset;
1450 };
1451
1452 struct tu_render_pass
1453 {
1454 uint32_t attachment_count;
1455 uint32_t subpass_count;
1456 uint32_t gmem_pixels;
1457 uint32_t tile_align_w;
1458 struct tu_subpass_attachment *subpass_attachments;
1459 struct tu_render_pass_attachment *attachments;
1460 struct tu_subpass_barrier end_barrier;
1461 struct tu_subpass subpasses[0];
1462 };
1463
1464 struct tu_query_pool
1465 {
1466 VkQueryType type;
1467 uint32_t stride;
1468 uint64_t size;
1469 uint32_t pipeline_statistics;
1470 struct tu_bo bo;
1471 };
1472
1473 struct tu_semaphore
1474 {
1475 uint32_t syncobj;
1476 uint32_t temp_syncobj;
1477 };
1478
1479 void
1480 tu_set_descriptor_set(struct tu_cmd_buffer *cmd_buffer,
1481 VkPipelineBindPoint bind_point,
1482 struct tu_descriptor_set *set,
1483 unsigned idx);
1484
1485 void
1486 tu_update_descriptor_sets(struct tu_device *device,
1487 struct tu_cmd_buffer *cmd_buffer,
1488 VkDescriptorSet overrideSet,
1489 uint32_t descriptorWriteCount,
1490 const VkWriteDescriptorSet *pDescriptorWrites,
1491 uint32_t descriptorCopyCount,
1492 const VkCopyDescriptorSet *pDescriptorCopies);
1493
1494 void
1495 tu_update_descriptor_set_with_template(
1496 struct tu_device *device,
1497 struct tu_cmd_buffer *cmd_buffer,
1498 struct tu_descriptor_set *set,
1499 VkDescriptorUpdateTemplate descriptorUpdateTemplate,
1500 const void *pData);
1501
1502 int
1503 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id);
1504
1505 int
1506 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size);
1507
1508 int
1509 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base);
1510
1511 int
1512 tu_drm_submitqueue_new(const struct tu_device *dev,
1513 int priority,
1514 uint32_t *queue_id);
1515
1516 void
1517 tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id);
1518
1519 uint32_t
1520 tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags);
1521 uint32_t
1522 tu_gem_import_dmabuf(const struct tu_device *dev,
1523 int prime_fd,
1524 uint64_t size);
1525 int
1526 tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle);
1527 void
1528 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle);
1529 uint64_t
1530 tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle);
1531 uint64_t
1532 tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle);
1533
1534 #define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \
1535 \
1536 static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
1537 { \
1538 return (struct __tu_type *) _handle; \
1539 } \
1540 \
1541 static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
1542 { \
1543 return (__VkType) _obj; \
1544 }
1545
1546 #define TU_DEFINE_NONDISP_HANDLE_CASTS(__tu_type, __VkType) \
1547 \
1548 static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \
1549 { \
1550 return (struct __tu_type *) (uintptr_t) _handle; \
1551 } \
1552 \
1553 static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \
1554 { \
1555 return (__VkType)(uintptr_t) _obj; \
1556 }
1557
1558 #define TU_FROM_HANDLE(__tu_type, __name, __handle) \
1559 struct __tu_type *__name = __tu_type##_from_handle(__handle)
1560
1561 TU_DEFINE_HANDLE_CASTS(tu_cmd_buffer, VkCommandBuffer)
1562 TU_DEFINE_HANDLE_CASTS(tu_device, VkDevice)
1563 TU_DEFINE_HANDLE_CASTS(tu_instance, VkInstance)
1564 TU_DEFINE_HANDLE_CASTS(tu_physical_device, VkPhysicalDevice)
1565 TU_DEFINE_HANDLE_CASTS(tu_queue, VkQueue)
1566
1567 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, VkCommandPool)
1568 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, VkBuffer)
1569 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, VkBufferView)
1570 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, VkDescriptorPool)
1571 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, VkDescriptorSet)
1572 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout,
1573 VkDescriptorSetLayout)
1574 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template,
1575 VkDescriptorUpdateTemplate)
1576 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, VkDeviceMemory)
1577 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_fence, VkFence)
1578 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_event, VkEvent)
1579 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, VkFramebuffer)
1580 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image, VkImage)
1581 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, VkImageView);
1582 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, VkPipelineCache)
1583 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, VkPipeline)
1584 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout)
1585 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool)
1586 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass)
1587 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler)
1588 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)
1589 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_shader_module, VkShaderModule)
1590 TU_DEFINE_NONDISP_HANDLE_CASTS(tu_semaphore, VkSemaphore)
1591
1592 #endif /* TU_PRIVATE_H */