anv: Implement the basic form of VK_EXT_transform_feedback
[mesa.git] / src / intel / vulkan / anv_private.h
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef ANV_PRIVATE_H
25 #define ANV_PRIVATE_H
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdbool.h>
30 #include <pthread.h>
31 #include <assert.h>
32 #include <stdint.h>
33 #include <i915_drm.h>
34
35 #ifdef HAVE_VALGRIND
36 #include <valgrind.h>
37 #include <memcheck.h>
38 #define VG(x) x
39 #ifndef NDEBUG
40 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
41 #endif
42 #else
43 #define VG(x)
44 #endif
45
46 #include "common/gen_clflush.h"
47 #include "common/gen_gem.h"
48 #include "dev/gen_device_info.h"
49 #include "blorp/blorp.h"
50 #include "compiler/brw_compiler.h"
51 #include "util/macros.h"
52 #include "util/hash_table.h"
53 #include "util/list.h"
54 #include "util/set.h"
55 #include "util/u_atomic.h"
56 #include "util/u_vector.h"
57 #include "util/u_math.h"
58 #include "util/vma.h"
59 #include "vk_alloc.h"
60 #include "vk_debug_report.h"
61
62 /* Pre-declarations needed for WSI entrypoints */
63 struct wl_surface;
64 struct wl_display;
65 typedef struct xcb_connection_t xcb_connection_t;
66 typedef uint32_t xcb_visualid_t;
67 typedef uint32_t xcb_window_t;
68
69 struct anv_buffer;
70 struct anv_buffer_view;
71 struct anv_image_view;
72 struct anv_instance;
73
74 struct gen_l3_config;
75
76 #include <vulkan/vulkan.h>
77 #include <vulkan/vulkan_intel.h>
78 #include <vulkan/vk_icd.h>
79
80 #include "anv_android.h"
81 #include "anv_entrypoints.h"
82 #include "anv_extensions.h"
83 #include "isl/isl.h"
84
85 #include "common/gen_debug.h"
86 #include "common/intel_log.h"
87 #include "wsi_common.h"
88
89 /* anv Virtual Memory Layout
90 * =========================
91 *
92 * When the anv driver is determining the virtual graphics addresses of memory
93 * objects itself using the softpin mechanism, the following memory ranges
94 * will be used.
95 *
96 * Three special considerations to notice:
97 *
98 * (1) the dynamic state pool is located within the same 4 GiB as the low
99 * heap. This is to work around a VF cache issue described in a comment in
100 * anv_physical_device_init_heaps.
101 *
102 * (2) the binding table pool is located at lower addresses than the surface
103 * state pool, within a 4 GiB range. This allows surface state base addresses
104 * to cover both binding tables (16 bit offsets) and surface states (32 bit
105 * offsets).
106 *
107 * (3) the last 4 GiB of the address space is withheld from the high
108 * heap. Various hardware units will read past the end of an object for
109 * various reasons. This healthy margin prevents reads from wrapping around
110 * 48-bit addresses.
111 */
112 #define LOW_HEAP_MIN_ADDRESS 0x000000001000ULL /* 4 KiB */
113 #define LOW_HEAP_MAX_ADDRESS 0x0000bfffffffULL
114 #define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */
115 #define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL
116 #define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */
117 #define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL
118 #define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */
119 #define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL
120 #define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */
121 #define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL
122 #define HIGH_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */
123 #define HIGH_HEAP_MAX_ADDRESS 0xfffeffffffffULL
124
125 #define LOW_HEAP_SIZE \
126 (LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)
127 #define HIGH_HEAP_SIZE \
128 (HIGH_HEAP_MAX_ADDRESS - HIGH_HEAP_MIN_ADDRESS + 1)
129 #define DYNAMIC_STATE_POOL_SIZE \
130 (DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)
131 #define BINDING_TABLE_POOL_SIZE \
132 (BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)
133 #define SURFACE_STATE_POOL_SIZE \
134 (SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)
135 #define INSTRUCTION_STATE_POOL_SIZE \
136 (INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)
137
138 /* Allowing different clear colors requires us to perform a depth resolve at
139 * the end of certain render passes. This is because while slow clears store
140 * the clear color in the HiZ buffer, fast clears (without a resolve) don't.
141 * See the PRMs for examples describing when additional resolves would be
142 * necessary. To enable fast clears without requiring extra resolves, we set
143 * the clear value to a globally-defined one. We could allow different values
144 * if the user doesn't expect coherent data during or after a render passes
145 * (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)
146 * don't seem to exist yet. In almost all Vulkan applications tested thus far,
147 * 1.0f seems to be the only value used. The only application that doesn't set
148 * this value does so through the usage of an seemingly uninitialized clear
149 * value.
150 */
151 #define ANV_HZ_FC_VAL 1.0f
152
153 #define MAX_VBS 28
154 #define MAX_XFB_BUFFERS 4
155 #define MAX_XFB_STREAMS 4
156 #define MAX_SETS 8
157 #define MAX_RTS 8
158 #define MAX_VIEWPORTS 16
159 #define MAX_SCISSORS 16
160 #define MAX_PUSH_CONSTANTS_SIZE 128
161 #define MAX_DYNAMIC_BUFFERS 16
162 #define MAX_IMAGES 64
163 #define MAX_GEN8_IMAGES 8
164 #define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
165
166 /* The kernel relocation API has a limitation of a 32-bit delta value
167 * applied to the address before it is written which, in spite of it being
168 * unsigned, is treated as signed . Because of the way that this maps to
169 * the Vulkan API, we cannot handle an offset into a buffer that does not
170 * fit into a signed 32 bits. The only mechanism we have for dealing with
171 * this at the moment is to limit all VkDeviceMemory objects to a maximum
172 * of 2GB each. The Vulkan spec allows us to do this:
173 *
174 * "Some platforms may have a limit on the maximum size of a single
175 * allocation. For example, certain systems may fail to create
176 * allocations with a size greater than or equal to 4GB. Such a limit is
177 * implementation-dependent, and if such a failure occurs then the error
178 * VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."
179 *
180 * We don't use vk_error here because it's not an error so much as an
181 * indication to the application that the allocation is too large.
182 */
183 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)
184
185 #define ANV_SVGS_VB_INDEX MAX_VBS
186 #define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)
187
188 /* We reserve this MI ALU register for the purpose of handling predication.
189 * Other code which uses the MI ALU should leave it alone.
190 */
191 #define ANV_PREDICATE_RESULT_REG MI_ALU_REG15
192
193 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
194
195 static inline uint32_t
196 align_down_npot_u32(uint32_t v, uint32_t a)
197 {
198 return v - (v % a);
199 }
200
201 static inline uint32_t
202 align_u32(uint32_t v, uint32_t a)
203 {
204 assert(a != 0 && a == (a & -a));
205 return (v + a - 1) & ~(a - 1);
206 }
207
208 static inline uint64_t
209 align_u64(uint64_t v, uint64_t a)
210 {
211 assert(a != 0 && a == (a & -a));
212 return (v + a - 1) & ~(a - 1);
213 }
214
215 static inline int32_t
216 align_i32(int32_t v, int32_t a)
217 {
218 assert(a != 0 && a == (a & -a));
219 return (v + a - 1) & ~(a - 1);
220 }
221
222 /** Alignment must be a power of 2. */
223 static inline bool
224 anv_is_aligned(uintmax_t n, uintmax_t a)
225 {
226 assert(a == (a & -a));
227 return (n & (a - 1)) == 0;
228 }
229
230 static inline uint32_t
231 anv_minify(uint32_t n, uint32_t levels)
232 {
233 if (unlikely(n == 0))
234 return 0;
235 else
236 return MAX2(n >> levels, 1);
237 }
238
239 static inline float
240 anv_clamp_f(float f, float min, float max)
241 {
242 assert(min < max);
243
244 if (f > max)
245 return max;
246 else if (f < min)
247 return min;
248 else
249 return f;
250 }
251
252 static inline bool
253 anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
254 {
255 if (*inout_mask & clear_mask) {
256 *inout_mask &= ~clear_mask;
257 return true;
258 } else {
259 return false;
260 }
261 }
262
263 static inline union isl_color_value
264 vk_to_isl_color(VkClearColorValue color)
265 {
266 return (union isl_color_value) {
267 .u32 = {
268 color.uint32[0],
269 color.uint32[1],
270 color.uint32[2],
271 color.uint32[3],
272 },
273 };
274 }
275
276 #define for_each_bit(b, dword) \
277 for (uint32_t __dword = (dword); \
278 (b) = __builtin_ffs(__dword) - 1, __dword; \
279 __dword &= ~(1 << (b)))
280
281 #define typed_memcpy(dest, src, count) ({ \
282 STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
283 memcpy((dest), (src), (count) * sizeof(*(src))); \
284 })
285
286 /* Mapping from anv object to VkDebugReportObjectTypeEXT. New types need
287 * to be added here in order to utilize mapping in debug/error/perf macros.
288 */
289 #define REPORT_OBJECT_TYPE(o) \
290 __builtin_choose_expr ( \
291 __builtin_types_compatible_p (__typeof (o), struct anv_instance*), \
292 VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT, \
293 __builtin_choose_expr ( \
294 __builtin_types_compatible_p (__typeof (o), struct anv_physical_device*), \
295 VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT, \
296 __builtin_choose_expr ( \
297 __builtin_types_compatible_p (__typeof (o), struct anv_device*), \
298 VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, \
299 __builtin_choose_expr ( \
300 __builtin_types_compatible_p (__typeof (o), const struct anv_device*), \
301 VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, \
302 __builtin_choose_expr ( \
303 __builtin_types_compatible_p (__typeof (o), struct anv_queue*), \
304 VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT, \
305 __builtin_choose_expr ( \
306 __builtin_types_compatible_p (__typeof (o), struct anv_semaphore*), \
307 VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT, \
308 __builtin_choose_expr ( \
309 __builtin_types_compatible_p (__typeof (o), struct anv_cmd_buffer*), \
310 VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT, \
311 __builtin_choose_expr ( \
312 __builtin_types_compatible_p (__typeof (o), struct anv_fence*), \
313 VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT, \
314 __builtin_choose_expr ( \
315 __builtin_types_compatible_p (__typeof (o), struct anv_device_memory*), \
316 VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, \
317 __builtin_choose_expr ( \
318 __builtin_types_compatible_p (__typeof (o), struct anv_buffer*), \
319 VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, \
320 __builtin_choose_expr ( \
321 __builtin_types_compatible_p (__typeof (o), struct anv_image*), \
322 VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, \
323 __builtin_choose_expr ( \
324 __builtin_types_compatible_p (__typeof (o), const struct anv_image*), \
325 VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, \
326 __builtin_choose_expr ( \
327 __builtin_types_compatible_p (__typeof (o), struct anv_event*), \
328 VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT, \
329 __builtin_choose_expr ( \
330 __builtin_types_compatible_p (__typeof (o), struct anv_query_pool*), \
331 VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT, \
332 __builtin_choose_expr ( \
333 __builtin_types_compatible_p (__typeof (o), struct anv_buffer_view*), \
334 VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT, \
335 __builtin_choose_expr ( \
336 __builtin_types_compatible_p (__typeof (o), struct anv_image_view*), \
337 VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT, \
338 __builtin_choose_expr ( \
339 __builtin_types_compatible_p (__typeof (o), struct anv_shader_module*), \
340 VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT, \
341 __builtin_choose_expr ( \
342 __builtin_types_compatible_p (__typeof (o), struct anv_pipeline_cache*), \
343 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT, \
344 __builtin_choose_expr ( \
345 __builtin_types_compatible_p (__typeof (o), struct anv_pipeline_layout*), \
346 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT, \
347 __builtin_choose_expr ( \
348 __builtin_types_compatible_p (__typeof (o), struct anv_render_pass*), \
349 VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT, \
350 __builtin_choose_expr ( \
351 __builtin_types_compatible_p (__typeof (o), struct anv_pipeline*), \
352 VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT, \
353 __builtin_choose_expr ( \
354 __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_set_layout*), \
355 VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT, \
356 __builtin_choose_expr ( \
357 __builtin_types_compatible_p (__typeof (o), struct anv_sampler*), \
358 VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT, \
359 __builtin_choose_expr ( \
360 __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_pool*), \
361 VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT, \
362 __builtin_choose_expr ( \
363 __builtin_types_compatible_p (__typeof (o), struct anv_descriptor_set*), \
364 VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT, \
365 __builtin_choose_expr ( \
366 __builtin_types_compatible_p (__typeof (o), struct anv_framebuffer*), \
367 VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT, \
368 __builtin_choose_expr ( \
369 __builtin_types_compatible_p (__typeof (o), struct anv_cmd_pool*), \
370 VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT, \
371 __builtin_choose_expr ( \
372 __builtin_types_compatible_p (__typeof (o), struct anv_surface*), \
373 VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT, \
374 __builtin_choose_expr ( \
375 __builtin_types_compatible_p (__typeof (o), struct wsi_swapchain*), \
376 VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT, \
377 __builtin_choose_expr ( \
378 __builtin_types_compatible_p (__typeof (o), struct vk_debug_callback*), \
379 VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT, \
380 __builtin_choose_expr ( \
381 __builtin_types_compatible_p (__typeof (o), void*), \
382 VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, \
383 /* The void expression results in a compile-time error \
384 when assigning the result to something. */ \
385 (void)0)))))))))))))))))))))))))))))))
386
387 /* Whenever we generate an error, pass it through this function. Useful for
388 * debugging, where we can break on it. Only call at error site, not when
389 * propagating errors. Might be useful to plug in a stack trace here.
390 */
391
392 VkResult __vk_errorv(struct anv_instance *instance, const void *object,
393 VkDebugReportObjectTypeEXT type, VkResult error,
394 const char *file, int line, const char *format,
395 va_list args);
396
397 VkResult __vk_errorf(struct anv_instance *instance, const void *object,
398 VkDebugReportObjectTypeEXT type, VkResult error,
399 const char *file, int line, const char *format, ...);
400
401 #ifdef DEBUG
402 #define vk_error(error) __vk_errorf(NULL, NULL,\
403 VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,\
404 error, __FILE__, __LINE__, NULL)
405 #define vk_errorv(instance, obj, error, format, args)\
406 __vk_errorv(instance, obj, REPORT_OBJECT_TYPE(obj), error,\
407 __FILE__, __LINE__, format, args)
408 #define vk_errorf(instance, obj, error, format, ...)\
409 __vk_errorf(instance, obj, REPORT_OBJECT_TYPE(obj), error,\
410 __FILE__, __LINE__, format, ## __VA_ARGS__)
411 #else
412 #define vk_error(error) error
413 #define vk_errorf(instance, obj, error, format, ...) error
414 #endif
415
416 /**
417 * Warn on ignored extension structs.
418 *
419 * The Vulkan spec requires us to ignore unsupported or unknown structs in
420 * a pNext chain. In debug mode, emitting warnings for ignored structs may
421 * help us discover structs that we should not have ignored.
422 *
423 *
424 * From the Vulkan 1.0.38 spec:
425 *
426 * Any component of the implementation (the loader, any enabled layers,
427 * and drivers) must skip over, without processing (other than reading the
428 * sType and pNext members) any chained structures with sType values not
429 * defined by extensions supported by that component.
430 */
431 #define anv_debug_ignored_stype(sType) \
432 intel_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
433
434 void __anv_perf_warn(struct anv_instance *instance, const void *object,
435 VkDebugReportObjectTypeEXT type, const char *file,
436 int line, const char *format, ...)
437 anv_printflike(6, 7);
438 void anv_loge(const char *format, ...) anv_printflike(1, 2);
439 void anv_loge_v(const char *format, va_list va);
440
441 /**
442 * Print a FINISHME message, including its source location.
443 */
444 #define anv_finishme(format, ...) \
445 do { \
446 static bool reported = false; \
447 if (!reported) { \
448 intel_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \
449 ##__VA_ARGS__); \
450 reported = true; \
451 } \
452 } while (0)
453
454 /**
455 * Print a perf warning message. Set INTEL_DEBUG=perf to see these.
456 */
457 #define anv_perf_warn(instance, obj, format, ...) \
458 do { \
459 static bool reported = false; \
460 if (!reported && unlikely(INTEL_DEBUG & DEBUG_PERF)) { \
461 __anv_perf_warn(instance, obj, REPORT_OBJECT_TYPE(obj), __FILE__, __LINE__,\
462 format, ##__VA_ARGS__); \
463 reported = true; \
464 } \
465 } while (0)
466
467 /* A non-fatal assert. Useful for debugging. */
468 #ifdef DEBUG
469 #define anv_assert(x) ({ \
470 if (unlikely(!(x))) \
471 intel_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
472 })
473 #else
474 #define anv_assert(x)
475 #endif
476
477 /* A multi-pointer allocator
478 *
479 * When copying data structures from the user (such as a render pass), it's
480 * common to need to allocate data for a bunch of different things. Instead
481 * of doing several allocations and having to handle all of the error checking
482 * that entails, it can be easier to do a single allocation. This struct
483 * helps facilitate that. The intended usage looks like this:
484 *
485 * ANV_MULTIALLOC(ma)
486 * anv_multialloc_add(&ma, &main_ptr, 1);
487 * anv_multialloc_add(&ma, &substruct1, substruct1Count);
488 * anv_multialloc_add(&ma, &substruct2, substruct2Count);
489 *
490 * if (!anv_multialloc_alloc(&ma, pAllocator, VK_ALLOCATION_SCOPE_FOO))
491 * return vk_error(VK_ERROR_OUT_OF_HOST_MEORY);
492 */
493 struct anv_multialloc {
494 size_t size;
495 size_t align;
496
497 uint32_t ptr_count;
498 void **ptrs[8];
499 };
500
501 #define ANV_MULTIALLOC_INIT \
502 ((struct anv_multialloc) { 0, })
503
504 #define ANV_MULTIALLOC(_name) \
505 struct anv_multialloc _name = ANV_MULTIALLOC_INIT
506
507 __attribute__((always_inline))
508 static inline void
509 _anv_multialloc_add(struct anv_multialloc *ma,
510 void **ptr, size_t size, size_t align)
511 {
512 size_t offset = align_u64(ma->size, align);
513 ma->size = offset + size;
514 ma->align = MAX2(ma->align, align);
515
516 /* Store the offset in the pointer. */
517 *ptr = (void *)(uintptr_t)offset;
518
519 assert(ma->ptr_count < ARRAY_SIZE(ma->ptrs));
520 ma->ptrs[ma->ptr_count++] = ptr;
521 }
522
523 #define anv_multialloc_add_size(_ma, _ptr, _size) \
524 _anv_multialloc_add((_ma), (void **)(_ptr), (_size), __alignof__(**(_ptr)))
525
526 #define anv_multialloc_add(_ma, _ptr, _count) \
527 anv_multialloc_add_size(_ma, _ptr, (_count) * sizeof(**(_ptr)));
528
529 __attribute__((always_inline))
530 static inline void *
531 anv_multialloc_alloc(struct anv_multialloc *ma,
532 const VkAllocationCallbacks *alloc,
533 VkSystemAllocationScope scope)
534 {
535 void *ptr = vk_alloc(alloc, ma->size, ma->align, scope);
536 if (!ptr)
537 return NULL;
538
539 /* Fill out each of the pointers with their final value.
540 *
541 * for (uint32_t i = 0; i < ma->ptr_count; i++)
542 * *ma->ptrs[i] = ptr + (uintptr_t)*ma->ptrs[i];
543 *
544 * Unfortunately, even though ma->ptr_count is basically guaranteed to be a
545 * constant, GCC is incapable of figuring this out and unrolling the loop
546 * so we have to give it a little help.
547 */
548 STATIC_ASSERT(ARRAY_SIZE(ma->ptrs) == 8);
549 #define _ANV_MULTIALLOC_UPDATE_POINTER(_i) \
550 if ((_i) < ma->ptr_count) \
551 *ma->ptrs[_i] = ptr + (uintptr_t)*ma->ptrs[_i]
552 _ANV_MULTIALLOC_UPDATE_POINTER(0);
553 _ANV_MULTIALLOC_UPDATE_POINTER(1);
554 _ANV_MULTIALLOC_UPDATE_POINTER(2);
555 _ANV_MULTIALLOC_UPDATE_POINTER(3);
556 _ANV_MULTIALLOC_UPDATE_POINTER(4);
557 _ANV_MULTIALLOC_UPDATE_POINTER(5);
558 _ANV_MULTIALLOC_UPDATE_POINTER(6);
559 _ANV_MULTIALLOC_UPDATE_POINTER(7);
560 #undef _ANV_MULTIALLOC_UPDATE_POINTER
561
562 return ptr;
563 }
564
565 __attribute__((always_inline))
566 static inline void *
567 anv_multialloc_alloc2(struct anv_multialloc *ma,
568 const VkAllocationCallbacks *parent_alloc,
569 const VkAllocationCallbacks *alloc,
570 VkSystemAllocationScope scope)
571 {
572 return anv_multialloc_alloc(ma, alloc ? alloc : parent_alloc, scope);
573 }
574
575 /* Extra ANV-defined BO flags which won't be passed to the kernel */
576 #define ANV_BO_EXTERNAL (1ull << 31)
577 #define ANV_BO_FLAG_MASK (1ull << 31)
578
579 struct anv_bo {
580 uint32_t gem_handle;
581
582 /* Index into the current validation list. This is used by the
583 * validation list building alrogithm to track which buffers are already
584 * in the validation list so that we can ensure uniqueness.
585 */
586 uint32_t index;
587
588 /* Last known offset. This value is provided by the kernel when we
589 * execbuf and is used as the presumed offset for the next bunch of
590 * relocations.
591 */
592 uint64_t offset;
593
594 uint64_t size;
595 void *map;
596
597 /** Flags to pass to the kernel through drm_i915_exec_object2::flags */
598 uint32_t flags;
599 };
600
601 static inline void
602 anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, uint64_t size)
603 {
604 bo->gem_handle = gem_handle;
605 bo->index = 0;
606 bo->offset = -1;
607 bo->size = size;
608 bo->map = NULL;
609 bo->flags = 0;
610 }
611
612 /* Represents a lock-free linked list of "free" things. This is used by
613 * both the block pool and the state pools. Unfortunately, in order to
614 * solve the ABA problem, we can't use a single uint32_t head.
615 */
616 union anv_free_list {
617 struct {
618 uint32_t offset;
619
620 /* A simple count that is incremented every time the head changes. */
621 uint32_t count;
622 };
623 uint64_t u64;
624 };
625
626 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })
627
628 struct anv_block_state {
629 union {
630 struct {
631 uint32_t next;
632 uint32_t end;
633 };
634 uint64_t u64;
635 };
636 };
637
638 #define anv_block_pool_foreach_bo(bo, pool) \
639 for (bo = (pool)->bos; bo != &(pool)->bos[(pool)->nbos]; bo++)
640
641 #define ANV_MAX_BLOCK_POOL_BOS 20
642
643 struct anv_block_pool {
644 struct anv_device *device;
645
646 uint64_t bo_flags;
647
648 struct anv_bo bos[ANV_MAX_BLOCK_POOL_BOS];
649 struct anv_bo *bo;
650 uint32_t nbos;
651
652 uint64_t size;
653
654 /* The address where the start of the pool is pinned. The various bos that
655 * are created as the pool grows will have addresses in the range
656 * [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).
657 */
658 uint64_t start_address;
659
660 /* The offset from the start of the bo to the "center" of the block
661 * pool. Pointers to allocated blocks are given by
662 * bo.map + center_bo_offset + offsets.
663 */
664 uint32_t center_bo_offset;
665
666 int fd;
667
668 /**
669 * Array of mmaps and gem handles owned by the block pool, reclaimed when
670 * the block pool is destroyed.
671 */
672 struct u_vector mmap_cleanups;
673
674 struct anv_block_state state;
675
676 struct anv_block_state back_state;
677 };
678
679 /* Block pools are backed by a fixed-size 1GB memfd */
680 #define BLOCK_POOL_MEMFD_SIZE (1ul << 30)
681
682 /* The center of the block pool is also the middle of the memfd. This may
683 * change in the future if we decide differently for some reason.
684 */
685 #define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)
686
687 static inline uint32_t
688 anv_block_pool_size(struct anv_block_pool *pool)
689 {
690 return pool->state.end + pool->back_state.end;
691 }
692
693 struct anv_state {
694 int32_t offset;
695 uint32_t alloc_size;
696 void *map;
697 uint32_t idx;
698 };
699
700 #define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })
701
702 struct anv_fixed_size_state_pool {
703 union anv_free_list free_list;
704 struct anv_block_state block;
705 };
706
707 #define ANV_MIN_STATE_SIZE_LOG2 6
708 #define ANV_MAX_STATE_SIZE_LOG2 20
709
710 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)
711
712 struct anv_free_entry {
713 uint32_t next;
714 struct anv_state state;
715 };
716
717 struct anv_state_table {
718 struct anv_device *device;
719 int fd;
720 struct anv_free_entry *map;
721 uint32_t size;
722 struct anv_block_state state;
723 struct u_vector mmap_cleanups;
724 };
725
726 struct anv_state_pool {
727 struct anv_block_pool block_pool;
728
729 struct anv_state_table table;
730
731 /* The size of blocks which will be allocated from the block pool */
732 uint32_t block_size;
733
734 /** Free list for "back" allocations */
735 union anv_free_list back_alloc_free_list;
736
737 struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];
738 };
739
740 struct anv_state_stream_block;
741
742 struct anv_state_stream {
743 struct anv_state_pool *state_pool;
744
745 /* The size of blocks to allocate from the state pool */
746 uint32_t block_size;
747
748 /* Current block we're allocating from */
749 struct anv_state block;
750
751 /* Offset into the current block at which to allocate the next state */
752 uint32_t next;
753
754 /* List of all blocks allocated from this pool */
755 struct anv_state_stream_block *block_list;
756 };
757
758 /* The block_pool functions exported for testing only. The block pool should
759 * only be used via a state pool (see below).
760 */
761 VkResult anv_block_pool_init(struct anv_block_pool *pool,
762 struct anv_device *device,
763 uint64_t start_address,
764 uint32_t initial_size,
765 uint64_t bo_flags);
766 void anv_block_pool_finish(struct anv_block_pool *pool);
767 int32_t anv_block_pool_alloc(struct anv_block_pool *pool,
768 uint32_t block_size, uint32_t *padding);
769 int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,
770 uint32_t block_size);
771 void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset);
772
773 VkResult anv_state_pool_init(struct anv_state_pool *pool,
774 struct anv_device *device,
775 uint64_t start_address,
776 uint32_t block_size,
777 uint64_t bo_flags);
778 void anv_state_pool_finish(struct anv_state_pool *pool);
779 struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,
780 uint32_t state_size, uint32_t alignment);
781 struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);
782 void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);
783 void anv_state_stream_init(struct anv_state_stream *stream,
784 struct anv_state_pool *state_pool,
785 uint32_t block_size);
786 void anv_state_stream_finish(struct anv_state_stream *stream);
787 struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,
788 uint32_t size, uint32_t alignment);
789
790 VkResult anv_state_table_init(struct anv_state_table *table,
791 struct anv_device *device,
792 uint32_t initial_entries);
793 void anv_state_table_finish(struct anv_state_table *table);
794 VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,
795 uint32_t count);
796 void anv_free_list_push(union anv_free_list *list,
797 struct anv_state_table *table,
798 uint32_t idx, uint32_t count);
799 struct anv_state* anv_free_list_pop(union anv_free_list *list,
800 struct anv_state_table *table);
801
802
803 static inline struct anv_state *
804 anv_state_table_get(struct anv_state_table *table, uint32_t idx)
805 {
806 return &table->map[idx].state;
807 }
808 /**
809 * Implements a pool of re-usable BOs. The interface is identical to that
810 * of block_pool except that each block is its own BO.
811 */
812 struct anv_bo_pool {
813 struct anv_device *device;
814
815 uint64_t bo_flags;
816
817 void *free_list[16];
818 };
819
820 void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
821 uint64_t bo_flags);
822 void anv_bo_pool_finish(struct anv_bo_pool *pool);
823 VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo,
824 uint32_t size);
825 void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo);
826
827 struct anv_scratch_bo {
828 bool exists;
829 struct anv_bo bo;
830 };
831
832 struct anv_scratch_pool {
833 /* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
834 struct anv_scratch_bo bos[16][MESA_SHADER_STAGES];
835 };
836
837 void anv_scratch_pool_init(struct anv_device *device,
838 struct anv_scratch_pool *pool);
839 void anv_scratch_pool_finish(struct anv_device *device,
840 struct anv_scratch_pool *pool);
841 struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
842 struct anv_scratch_pool *pool,
843 gl_shader_stage stage,
844 unsigned per_thread_scratch);
845
846 /** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */
847 struct anv_bo_cache {
848 struct hash_table *bo_map;
849 pthread_mutex_t mutex;
850 };
851
852 VkResult anv_bo_cache_init(struct anv_bo_cache *cache);
853 void anv_bo_cache_finish(struct anv_bo_cache *cache);
854 VkResult anv_bo_cache_alloc(struct anv_device *device,
855 struct anv_bo_cache *cache,
856 uint64_t size, uint64_t bo_flags,
857 struct anv_bo **bo);
858 VkResult anv_bo_cache_import(struct anv_device *device,
859 struct anv_bo_cache *cache,
860 int fd, uint64_t bo_flags,
861 struct anv_bo **bo);
862 VkResult anv_bo_cache_export(struct anv_device *device,
863 struct anv_bo_cache *cache,
864 struct anv_bo *bo_in, int *fd_out);
865 void anv_bo_cache_release(struct anv_device *device,
866 struct anv_bo_cache *cache,
867 struct anv_bo *bo);
868
869 struct anv_memory_type {
870 /* Standard bits passed on to the client */
871 VkMemoryPropertyFlags propertyFlags;
872 uint32_t heapIndex;
873
874 /* Driver-internal book-keeping */
875 VkBufferUsageFlags valid_buffer_usage;
876 };
877
878 struct anv_memory_heap {
879 /* Standard bits passed on to the client */
880 VkDeviceSize size;
881 VkMemoryHeapFlags flags;
882
883 /* Driver-internal book-keeping */
884 bool supports_48bit_addresses;
885 };
886
887 struct anv_physical_device {
888 VK_LOADER_DATA _loader_data;
889
890 struct anv_instance * instance;
891 uint32_t chipset_id;
892 bool no_hw;
893 char path[20];
894 const char * name;
895 struct {
896 uint16_t domain;
897 uint8_t bus;
898 uint8_t device;
899 uint8_t function;
900 } pci_info;
901 struct gen_device_info info;
902 /** Amount of "GPU memory" we want to advertise
903 *
904 * Clearly, this value is bogus since Intel is a UMA architecture. On
905 * gen7 platforms, we are limited by GTT size unless we want to implement
906 * fine-grained tracking and GTT splitting. On Broadwell and above we are
907 * practically unlimited. However, we will never report more than 3/4 of
908 * the total system ram to try and avoid running out of RAM.
909 */
910 bool supports_48bit_addresses;
911 struct brw_compiler * compiler;
912 struct isl_device isl_dev;
913 int cmd_parser_version;
914 bool has_exec_async;
915 bool has_exec_capture;
916 bool has_exec_fence;
917 bool has_syncobj;
918 bool has_syncobj_wait;
919 bool has_context_priority;
920 bool use_softpin;
921 bool has_context_isolation;
922
923 struct anv_device_extension_table supported_extensions;
924
925 uint32_t eu_total;
926 uint32_t subslice_total;
927
928 struct {
929 uint32_t type_count;
930 struct anv_memory_type types[VK_MAX_MEMORY_TYPES];
931 uint32_t heap_count;
932 struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];
933 } memory;
934
935 uint8_t driver_build_sha1[20];
936 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
937 uint8_t driver_uuid[VK_UUID_SIZE];
938 uint8_t device_uuid[VK_UUID_SIZE];
939
940 struct disk_cache * disk_cache;
941
942 struct wsi_device wsi_device;
943 int local_fd;
944 int master_fd;
945 };
946
947 struct anv_app_info {
948 const char* app_name;
949 uint32_t app_version;
950 const char* engine_name;
951 uint32_t engine_version;
952 uint32_t api_version;
953 };
954
955 struct anv_instance {
956 VK_LOADER_DATA _loader_data;
957
958 VkAllocationCallbacks alloc;
959
960 struct anv_app_info app_info;
961
962 struct anv_instance_extension_table enabled_extensions;
963 struct anv_instance_dispatch_table dispatch;
964 struct anv_device_dispatch_table device_dispatch;
965
966 int physicalDeviceCount;
967 struct anv_physical_device physicalDevice;
968
969 bool pipeline_cache_enabled;
970
971 struct vk_debug_report_instance debug_report_callbacks;
972 };
973
974 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
975 void anv_finish_wsi(struct anv_physical_device *physical_device);
976
977 uint32_t anv_physical_device_api_version(struct anv_physical_device *dev);
978 bool anv_physical_device_extension_supported(struct anv_physical_device *dev,
979 const char *name);
980
981 struct anv_queue {
982 VK_LOADER_DATA _loader_data;
983
984 struct anv_device * device;
985
986 VkDeviceQueueCreateFlags flags;
987 };
988
989 struct anv_pipeline_cache {
990 struct anv_device * device;
991 pthread_mutex_t mutex;
992
993 struct hash_table * nir_cache;
994
995 struct hash_table * cache;
996 };
997
998 struct nir_xfb_info;
999 struct anv_pipeline_bind_map;
1000
1001 void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,
1002 struct anv_device *device,
1003 bool cache_enabled);
1004 void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);
1005
1006 struct anv_shader_bin *
1007 anv_pipeline_cache_search(struct anv_pipeline_cache *cache,
1008 const void *key, uint32_t key_size);
1009 struct anv_shader_bin *
1010 anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
1011 const void *key_data, uint32_t key_size,
1012 const void *kernel_data, uint32_t kernel_size,
1013 const void *constant_data,
1014 uint32_t constant_data_size,
1015 const struct brw_stage_prog_data *prog_data,
1016 uint32_t prog_data_size,
1017 const struct nir_xfb_info *xfb_info,
1018 const struct anv_pipeline_bind_map *bind_map);
1019
1020 struct anv_shader_bin *
1021 anv_device_search_for_kernel(struct anv_device *device,
1022 struct anv_pipeline_cache *cache,
1023 const void *key_data, uint32_t key_size);
1024
1025 struct anv_shader_bin *
1026 anv_device_upload_kernel(struct anv_device *device,
1027 struct anv_pipeline_cache *cache,
1028 const void *key_data, uint32_t key_size,
1029 const void *kernel_data, uint32_t kernel_size,
1030 const void *constant_data,
1031 uint32_t constant_data_size,
1032 const struct brw_stage_prog_data *prog_data,
1033 uint32_t prog_data_size,
1034 const struct nir_xfb_info *xfb_info,
1035 const struct anv_pipeline_bind_map *bind_map);
1036
1037 struct nir_shader;
1038 struct nir_shader_compiler_options;
1039
1040 struct nir_shader *
1041 anv_device_search_for_nir(struct anv_device *device,
1042 struct anv_pipeline_cache *cache,
1043 const struct nir_shader_compiler_options *nir_options,
1044 unsigned char sha1_key[20],
1045 void *mem_ctx);
1046
1047 void
1048 anv_device_upload_nir(struct anv_device *device,
1049 struct anv_pipeline_cache *cache,
1050 const struct nir_shader *nir,
1051 unsigned char sha1_key[20]);
1052
1053 struct anv_device {
1054 VK_LOADER_DATA _loader_data;
1055
1056 VkAllocationCallbacks alloc;
1057
1058 struct anv_instance * instance;
1059 uint32_t chipset_id;
1060 bool no_hw;
1061 struct gen_device_info info;
1062 struct isl_device isl_dev;
1063 int context_id;
1064 int fd;
1065 bool can_chain_batches;
1066 bool robust_buffer_access;
1067 struct anv_device_extension_table enabled_extensions;
1068 struct anv_device_dispatch_table dispatch;
1069
1070 pthread_mutex_t vma_mutex;
1071 struct util_vma_heap vma_lo;
1072 struct util_vma_heap vma_hi;
1073 uint64_t vma_lo_available;
1074 uint64_t vma_hi_available;
1075
1076 struct anv_bo_pool batch_bo_pool;
1077
1078 struct anv_bo_cache bo_cache;
1079
1080 struct anv_state_pool dynamic_state_pool;
1081 struct anv_state_pool instruction_state_pool;
1082 struct anv_state_pool binding_table_pool;
1083 struct anv_state_pool surface_state_pool;
1084
1085 struct anv_bo workaround_bo;
1086 struct anv_bo trivial_batch_bo;
1087 struct anv_bo hiz_clear_bo;
1088
1089 struct anv_pipeline_cache default_pipeline_cache;
1090 struct blorp_context blorp;
1091
1092 struct anv_state border_colors;
1093
1094 struct anv_queue queue;
1095
1096 struct anv_scratch_pool scratch_pool;
1097
1098 uint32_t default_mocs;
1099 uint32_t external_mocs;
1100
1101 pthread_mutex_t mutex;
1102 pthread_cond_t queue_submit;
1103 bool _lost;
1104 };
1105
1106 static inline struct anv_state_pool *
1107 anv_binding_table_pool(struct anv_device *device)
1108 {
1109 if (device->instance->physicalDevice.use_softpin)
1110 return &device->binding_table_pool;
1111 else
1112 return &device->surface_state_pool;
1113 }
1114
1115 static inline struct anv_state
1116 anv_binding_table_pool_alloc(struct anv_device *device) {
1117 if (device->instance->physicalDevice.use_softpin)
1118 return anv_state_pool_alloc(&device->binding_table_pool,
1119 device->binding_table_pool.block_size, 0);
1120 else
1121 return anv_state_pool_alloc_back(&device->surface_state_pool);
1122 }
1123
1124 static inline void
1125 anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {
1126 anv_state_pool_free(anv_binding_table_pool(device), state);
1127 }
1128
1129 static inline uint32_t
1130 anv_mocs_for_bo(const struct anv_device *device, const struct anv_bo *bo)
1131 {
1132 if (bo->flags & ANV_BO_EXTERNAL)
1133 return device->external_mocs;
1134 else
1135 return device->default_mocs;
1136 }
1137
1138 void anv_device_init_blorp(struct anv_device *device);
1139 void anv_device_finish_blorp(struct anv_device *device);
1140
1141 VkResult _anv_device_set_lost(struct anv_device *device,
1142 const char *file, int line,
1143 const char *msg, ...);
1144 #define anv_device_set_lost(dev, ...) \
1145 _anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
1146
1147 static inline bool
1148 anv_device_is_lost(struct anv_device *device)
1149 {
1150 return unlikely(device->_lost);
1151 }
1152
1153 VkResult anv_device_execbuf(struct anv_device *device,
1154 struct drm_i915_gem_execbuffer2 *execbuf,
1155 struct anv_bo **execbuf_bos);
1156 VkResult anv_device_query_status(struct anv_device *device);
1157 VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
1158 VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
1159 int64_t timeout);
1160
1161 void* anv_gem_mmap(struct anv_device *device,
1162 uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
1163 void anv_gem_munmap(void *p, uint64_t size);
1164 uint32_t anv_gem_create(struct anv_device *device, uint64_t size);
1165 void anv_gem_close(struct anv_device *device, uint32_t gem_handle);
1166 uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);
1167 int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);
1168 int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);
1169 int anv_gem_execbuffer(struct anv_device *device,
1170 struct drm_i915_gem_execbuffer2 *execbuf);
1171 int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,
1172 uint32_t stride, uint32_t tiling);
1173 int anv_gem_create_context(struct anv_device *device);
1174 bool anv_gem_has_context_priority(int fd);
1175 int anv_gem_destroy_context(struct anv_device *device, int context);
1176 int anv_gem_set_context_param(int fd, int context, uint32_t param,
1177 uint64_t value);
1178 int anv_gem_get_context_param(int fd, int context, uint32_t param,
1179 uint64_t *value);
1180 int anv_gem_get_param(int fd, uint32_t param);
1181 int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);
1182 bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);
1183 int anv_gem_get_aperture(int fd, uint64_t *size);
1184 int anv_gem_gpu_get_reset_stats(struct anv_device *device,
1185 uint32_t *active, uint32_t *pending);
1186 int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);
1187 int anv_gem_reg_read(struct anv_device *device,
1188 uint32_t offset, uint64_t *result);
1189 uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);
1190 int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);
1191 int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,
1192 uint32_t read_domains, uint32_t write_domain);
1193 int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);
1194 uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);
1195 void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);
1196 int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);
1197 uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);
1198 int anv_gem_syncobj_export_sync_file(struct anv_device *device,
1199 uint32_t handle);
1200 int anv_gem_syncobj_import_sync_file(struct anv_device *device,
1201 uint32_t handle, int fd);
1202 void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);
1203 bool anv_gem_supports_syncobj_wait(int fd);
1204 int anv_gem_syncobj_wait(struct anv_device *device,
1205 uint32_t *handles, uint32_t num_handles,
1206 int64_t abs_timeout_ns, bool wait_all);
1207
1208 bool anv_vma_alloc(struct anv_device *device, struct anv_bo *bo);
1209 void anv_vma_free(struct anv_device *device, struct anv_bo *bo);
1210
1211 VkResult anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size);
1212
1213 struct anv_reloc_list {
1214 uint32_t num_relocs;
1215 uint32_t array_length;
1216 struct drm_i915_gem_relocation_entry * relocs;
1217 struct anv_bo ** reloc_bos;
1218 struct set * deps;
1219 };
1220
1221 VkResult anv_reloc_list_init(struct anv_reloc_list *list,
1222 const VkAllocationCallbacks *alloc);
1223 void anv_reloc_list_finish(struct anv_reloc_list *list,
1224 const VkAllocationCallbacks *alloc);
1225
1226 VkResult anv_reloc_list_add(struct anv_reloc_list *list,
1227 const VkAllocationCallbacks *alloc,
1228 uint32_t offset, struct anv_bo *target_bo,
1229 uint32_t delta);
1230
1231 struct anv_batch_bo {
1232 /* Link in the anv_cmd_buffer.owned_batch_bos list */
1233 struct list_head link;
1234
1235 struct anv_bo bo;
1236
1237 /* Bytes actually consumed in this batch BO */
1238 uint32_t length;
1239
1240 struct anv_reloc_list relocs;
1241 };
1242
1243 struct anv_batch {
1244 const VkAllocationCallbacks * alloc;
1245
1246 void * start;
1247 void * end;
1248 void * next;
1249
1250 struct anv_reloc_list * relocs;
1251
1252 /* This callback is called (with the associated user data) in the event
1253 * that the batch runs out of space.
1254 */
1255 VkResult (*extend_cb)(struct anv_batch *, void *);
1256 void * user_data;
1257
1258 /**
1259 * Current error status of the command buffer. Used to track inconsistent
1260 * or incomplete command buffer states that are the consequence of run-time
1261 * errors such as out of memory scenarios. We want to track this in the
1262 * batch because the command buffer object is not visible to some parts
1263 * of the driver.
1264 */
1265 VkResult status;
1266 };
1267
1268 void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);
1269 void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);
1270 uint64_t anv_batch_emit_reloc(struct anv_batch *batch,
1271 void *location, struct anv_bo *bo, uint32_t offset);
1272 VkResult anv_device_submit_simple_batch(struct anv_device *device,
1273 struct anv_batch *batch);
1274
1275 static inline VkResult
1276 anv_batch_set_error(struct anv_batch *batch, VkResult error)
1277 {
1278 assert(error != VK_SUCCESS);
1279 if (batch->status == VK_SUCCESS)
1280 batch->status = error;
1281 return batch->status;
1282 }
1283
1284 static inline bool
1285 anv_batch_has_error(struct anv_batch *batch)
1286 {
1287 return batch->status != VK_SUCCESS;
1288 }
1289
1290 struct anv_address {
1291 struct anv_bo *bo;
1292 uint32_t offset;
1293 };
1294
1295 #define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })
1296
1297 static inline bool
1298 anv_address_is_null(struct anv_address addr)
1299 {
1300 return addr.bo == NULL && addr.offset == 0;
1301 }
1302
1303 static inline uint64_t
1304 anv_address_physical(struct anv_address addr)
1305 {
1306 if (addr.bo && (addr.bo->flags & EXEC_OBJECT_PINNED))
1307 return gen_canonical_address(addr.bo->offset + addr.offset);
1308 else
1309 return gen_canonical_address(addr.offset);
1310 }
1311
1312 static inline struct anv_address
1313 anv_address_add(struct anv_address addr, uint64_t offset)
1314 {
1315 addr.offset += offset;
1316 return addr;
1317 }
1318
1319 static inline void
1320 write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
1321 {
1322 unsigned reloc_size = 0;
1323 if (device->info.gen >= 8) {
1324 reloc_size = sizeof(uint64_t);
1325 *(uint64_t *)p = gen_canonical_address(v);
1326 } else {
1327 reloc_size = sizeof(uint32_t);
1328 *(uint32_t *)p = v;
1329 }
1330
1331 if (flush && !device->info.has_llc)
1332 gen_flush_range(p, reloc_size);
1333 }
1334
1335 static inline uint64_t
1336 _anv_combine_address(struct anv_batch *batch, void *location,
1337 const struct anv_address address, uint32_t delta)
1338 {
1339 if (address.bo == NULL) {
1340 return address.offset + delta;
1341 } else {
1342 assert(batch->start <= location && location < batch->end);
1343
1344 return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);
1345 }
1346 }
1347
1348 #define __gen_address_type struct anv_address
1349 #define __gen_user_data struct anv_batch
1350 #define __gen_combine_address _anv_combine_address
1351
1352 /* Wrapper macros needed to work around preprocessor argument issues. In
1353 * particular, arguments don't get pre-evaluated if they are concatenated.
1354 * This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the
1355 * GENX macro won't get evaluated if the emit macro contains "cmd ## foo".
1356 * We can work around this easily enough with these helpers.
1357 */
1358 #define __anv_cmd_length(cmd) cmd ## _length
1359 #define __anv_cmd_length_bias(cmd) cmd ## _length_bias
1360 #define __anv_cmd_header(cmd) cmd ## _header
1361 #define __anv_cmd_pack(cmd) cmd ## _pack
1362 #define __anv_reg_num(reg) reg ## _num
1363
1364 #define anv_pack_struct(dst, struc, ...) do { \
1365 struct struc __template = { \
1366 __VA_ARGS__ \
1367 }; \
1368 __anv_cmd_pack(struc)(NULL, dst, &__template); \
1369 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \
1370 } while (0)
1371
1372 #define anv_batch_emitn(batch, n, cmd, ...) ({ \
1373 void *__dst = anv_batch_emit_dwords(batch, n); \
1374 if (__dst) { \
1375 struct cmd __template = { \
1376 __anv_cmd_header(cmd), \
1377 .DWordLength = n - __anv_cmd_length_bias(cmd), \
1378 __VA_ARGS__ \
1379 }; \
1380 __anv_cmd_pack(cmd)(batch, __dst, &__template); \
1381 } \
1382 __dst; \
1383 })
1384
1385 #define anv_batch_emit_merge(batch, dwords0, dwords1) \
1386 do { \
1387 uint32_t *dw; \
1388 \
1389 STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
1390 dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \
1391 if (!dw) \
1392 break; \
1393 for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
1394 dw[i] = (dwords0)[i] | (dwords1)[i]; \
1395 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
1396 } while (0)
1397
1398 #define anv_batch_emit(batch, cmd, name) \
1399 for (struct cmd name = { __anv_cmd_header(cmd) }, \
1400 *_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \
1401 __builtin_expect(_dst != NULL, 1); \
1402 ({ __anv_cmd_pack(cmd)(batch, _dst, &name); \
1403 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \
1404 _dst = NULL; \
1405 }))
1406
1407 /* MEMORY_OBJECT_CONTROL_STATE:
1408 * .GraphicsDataTypeGFDT = 0,
1409 * .LLCCacheabilityControlLLCCC = 0,
1410 * .L3CacheabilityControlL3CC = 1,
1411 */
1412 #define GEN7_MOCS 1
1413
1414 /* MEMORY_OBJECT_CONTROL_STATE:
1415 * .LLCeLLCCacheabilityControlLLCCC = 0,
1416 * .L3CacheabilityControlL3CC = 1,
1417 */
1418 #define GEN75_MOCS 1
1419
1420 /* MEMORY_OBJECT_CONTROL_STATE:
1421 * .MemoryTypeLLCeLLCCacheabilityControl = WB,
1422 * .TargetCache = L3DefertoPATforLLCeLLCselection,
1423 * .AgeforQUADLRU = 0
1424 */
1425 #define GEN8_MOCS 0x78
1426
1427 /* MEMORY_OBJECT_CONTROL_STATE:
1428 * .MemoryTypeLLCeLLCCacheabilityControl = UCwithFenceifcoherentcycle,
1429 * .TargetCache = L3DefertoPATforLLCeLLCselection,
1430 * .AgeforQUADLRU = 0
1431 */
1432 #define GEN8_EXTERNAL_MOCS 0x18
1433
1434 /* Skylake: MOCS is now an index into an array of 62 different caching
1435 * configurations programmed by the kernel.
1436 */
1437
1438 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
1439 #define GEN9_MOCS 2
1440
1441 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
1442 #define GEN9_EXTERNAL_MOCS 1
1443
1444 /* Cannonlake MOCS defines are duplicates of Skylake MOCS defines. */
1445 #define GEN10_MOCS GEN9_MOCS
1446 #define GEN10_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
1447
1448 /* Ice Lake MOCS defines are duplicates of Skylake MOCS defines. */
1449 #define GEN11_MOCS GEN9_MOCS
1450 #define GEN11_EXTERNAL_MOCS GEN9_EXTERNAL_MOCS
1451
1452 struct anv_device_memory {
1453 struct anv_bo * bo;
1454 struct anv_memory_type * type;
1455 VkDeviceSize map_size;
1456 void * map;
1457
1458 /* If set, we are holding reference to AHardwareBuffer
1459 * which we must release when memory is freed.
1460 */
1461 struct AHardwareBuffer * ahw;
1462 };
1463
1464 /**
1465 * Header for Vertex URB Entry (VUE)
1466 */
1467 struct anv_vue_header {
1468 uint32_t Reserved;
1469 uint32_t RTAIndex; /* RenderTargetArrayIndex */
1470 uint32_t ViewportIndex;
1471 float PointWidth;
1472 };
1473
1474 struct anv_descriptor_set_binding_layout {
1475 #ifndef NDEBUG
1476 /* The type of the descriptors in this binding */
1477 VkDescriptorType type;
1478 #endif
1479
1480 /* Number of array elements in this binding */
1481 uint16_t array_size;
1482
1483 /* Index into the flattend descriptor set */
1484 uint16_t descriptor_index;
1485
1486 /* Index into the dynamic state array for a dynamic buffer */
1487 int16_t dynamic_offset_index;
1488
1489 /* Index into the descriptor set buffer views */
1490 int16_t buffer_index;
1491
1492 struct {
1493 /* Index into the binding table for the associated surface */
1494 int16_t surface_index;
1495
1496 /* Index into the sampler table for the associated sampler */
1497 int16_t sampler_index;
1498
1499 /* Index into the image table for the associated image */
1500 int16_t image_index;
1501 } stage[MESA_SHADER_STAGES];
1502
1503 /* Immutable samplers (or NULL if no immutable samplers) */
1504 struct anv_sampler **immutable_samplers;
1505 };
1506
1507 struct anv_descriptor_set_layout {
1508 /* Descriptor set layouts can be destroyed at almost any time */
1509 uint32_t ref_cnt;
1510
1511 /* Number of bindings in this descriptor set */
1512 uint16_t binding_count;
1513
1514 /* Total size of the descriptor set with room for all array entries */
1515 uint16_t size;
1516
1517 /* Shader stages affected by this descriptor set */
1518 uint16_t shader_stages;
1519
1520 /* Number of buffers in this descriptor set */
1521 uint16_t buffer_count;
1522
1523 /* Number of dynamic offsets used by this descriptor set */
1524 uint16_t dynamic_offset_count;
1525
1526 /* Bindings in this descriptor set */
1527 struct anv_descriptor_set_binding_layout binding[0];
1528 };
1529
1530 static inline void
1531 anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
1532 {
1533 assert(layout && layout->ref_cnt >= 1);
1534 p_atomic_inc(&layout->ref_cnt);
1535 }
1536
1537 static inline void
1538 anv_descriptor_set_layout_unref(struct anv_device *device,
1539 struct anv_descriptor_set_layout *layout)
1540 {
1541 assert(layout && layout->ref_cnt >= 1);
1542 if (p_atomic_dec_zero(&layout->ref_cnt))
1543 vk_free(&device->alloc, layout);
1544 }
1545
1546 struct anv_descriptor {
1547 VkDescriptorType type;
1548
1549 union {
1550 struct {
1551 VkImageLayout layout;
1552 struct anv_image_view *image_view;
1553 struct anv_sampler *sampler;
1554 };
1555
1556 struct {
1557 struct anv_buffer *buffer;
1558 uint64_t offset;
1559 uint64_t range;
1560 };
1561
1562 struct anv_buffer_view *buffer_view;
1563 };
1564 };
1565
1566 struct anv_descriptor_set {
1567 struct anv_descriptor_set_layout *layout;
1568 uint32_t size;
1569 uint32_t buffer_count;
1570 struct anv_buffer_view *buffer_views;
1571 struct anv_descriptor descriptors[0];
1572 };
1573
1574 struct anv_buffer_view {
1575 enum isl_format format; /**< VkBufferViewCreateInfo::format */
1576 uint64_t range; /**< VkBufferViewCreateInfo::range */
1577
1578 struct anv_address address;
1579
1580 struct anv_state surface_state;
1581 struct anv_state storage_surface_state;
1582 struct anv_state writeonly_storage_surface_state;
1583
1584 struct brw_image_param storage_image_param;
1585 };
1586
1587 struct anv_push_descriptor_set {
1588 struct anv_descriptor_set set;
1589
1590 /* Put this field right behind anv_descriptor_set so it fills up the
1591 * descriptors[0] field. */
1592 struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
1593 struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
1594 };
1595
1596 struct anv_descriptor_pool {
1597 uint32_t size;
1598 uint32_t next;
1599 uint32_t free_list;
1600
1601 struct anv_state_stream surface_state_stream;
1602 void *surface_state_free_list;
1603
1604 char data[0];
1605 };
1606
1607 enum anv_descriptor_template_entry_type {
1608 ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,
1609 ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,
1610 ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW
1611 };
1612
1613 struct anv_descriptor_template_entry {
1614 /* The type of descriptor in this entry */
1615 VkDescriptorType type;
1616
1617 /* Binding in the descriptor set */
1618 uint32_t binding;
1619
1620 /* Offset at which to write into the descriptor set binding */
1621 uint32_t array_element;
1622
1623 /* Number of elements to write into the descriptor set binding */
1624 uint32_t array_count;
1625
1626 /* Offset into the user provided data */
1627 size_t offset;
1628
1629 /* Stride between elements into the user provided data */
1630 size_t stride;
1631 };
1632
1633 struct anv_descriptor_update_template {
1634 VkPipelineBindPoint bind_point;
1635
1636 /* The descriptor set this template corresponds to. This value is only
1637 * valid if the template was created with the templateType
1638 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1639 */
1640 uint8_t set;
1641
1642 /* Number of entries in this template */
1643 uint32_t entry_count;
1644
1645 /* Entries of the template */
1646 struct anv_descriptor_template_entry entries[0];
1647 };
1648
1649 size_t
1650 anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout);
1651
1652 void
1653 anv_descriptor_set_write_image_view(struct anv_descriptor_set *set,
1654 const struct gen_device_info * const devinfo,
1655 const VkDescriptorImageInfo * const info,
1656 VkDescriptorType type,
1657 uint32_t binding,
1658 uint32_t element);
1659
1660 void
1661 anv_descriptor_set_write_buffer_view(struct anv_descriptor_set *set,
1662 VkDescriptorType type,
1663 struct anv_buffer_view *buffer_view,
1664 uint32_t binding,
1665 uint32_t element);
1666
1667 void
1668 anv_descriptor_set_write_buffer(struct anv_descriptor_set *set,
1669 struct anv_device *device,
1670 struct anv_state_stream *alloc_stream,
1671 VkDescriptorType type,
1672 struct anv_buffer *buffer,
1673 uint32_t binding,
1674 uint32_t element,
1675 VkDeviceSize offset,
1676 VkDeviceSize range);
1677
1678 void
1679 anv_descriptor_set_write_template(struct anv_descriptor_set *set,
1680 struct anv_device *device,
1681 struct anv_state_stream *alloc_stream,
1682 const struct anv_descriptor_update_template *template,
1683 const void *data);
1684
1685 VkResult
1686 anv_descriptor_set_create(struct anv_device *device,
1687 struct anv_descriptor_pool *pool,
1688 struct anv_descriptor_set_layout *layout,
1689 struct anv_descriptor_set **out_set);
1690
1691 void
1692 anv_descriptor_set_destroy(struct anv_device *device,
1693 struct anv_descriptor_pool *pool,
1694 struct anv_descriptor_set *set);
1695
1696 #define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
1697 #define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
1698
1699 struct anv_pipeline_binding {
1700 /* The descriptor set this surface corresponds to. The special value of
1701 * ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS indicates that the offset refers
1702 * to a color attachment and not a regular descriptor.
1703 */
1704 uint8_t set;
1705
1706 /* Binding in the descriptor set */
1707 uint32_t binding;
1708
1709 /* Index in the binding */
1710 uint32_t index;
1711
1712 /* Plane in the binding index */
1713 uint8_t plane;
1714
1715 /* Input attachment index (relative to the subpass) */
1716 uint8_t input_attachment_index;
1717
1718 /* For a storage image, whether it is write-only */
1719 bool write_only;
1720 };
1721
1722 struct anv_pipeline_layout {
1723 struct {
1724 struct anv_descriptor_set_layout *layout;
1725 uint32_t dynamic_offset_start;
1726 } set[MAX_SETS];
1727
1728 uint32_t num_sets;
1729
1730 struct {
1731 bool has_dynamic_offsets;
1732 } stage[MESA_SHADER_STAGES];
1733
1734 unsigned char sha1[20];
1735 };
1736
1737 struct anv_buffer {
1738 struct anv_device * device;
1739 VkDeviceSize size;
1740
1741 VkBufferUsageFlags usage;
1742
1743 /* Set when bound */
1744 struct anv_address address;
1745 };
1746
1747 static inline uint64_t
1748 anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)
1749 {
1750 assert(offset <= buffer->size);
1751 if (range == VK_WHOLE_SIZE) {
1752 return buffer->size - offset;
1753 } else {
1754 assert(range + offset >= range);
1755 assert(range + offset <= buffer->size);
1756 return range;
1757 }
1758 }
1759
1760 enum anv_cmd_dirty_bits {
1761 ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */
1762 ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */
1763 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */
1764 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */
1765 ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */
1766 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */
1767 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */
1768 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */
1769 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */
1770 ANV_CMD_DIRTY_DYNAMIC_ALL = (1 << 9) - 1,
1771 ANV_CMD_DIRTY_PIPELINE = 1 << 9,
1772 ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,
1773 ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,
1774 ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,
1775 };
1776 typedef uint32_t anv_cmd_dirty_mask_t;
1777
1778 enum anv_pipe_bits {
1779 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),
1780 ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),
1781 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),
1782 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),
1783 ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),
1784 ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),
1785 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),
1786 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),
1787 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),
1788 ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),
1789 ANV_PIPE_CS_STALL_BIT = (1 << 20),
1790
1791 /* This bit does not exist directly in PIPE_CONTROL. Instead it means that
1792 * a flush has happened but not a CS stall. The next time we do any sort
1793 * of invalidation we need to insert a CS stall at that time. Otherwise,
1794 * we would have to CS stall on every flush which could be bad.
1795 */
1796 ANV_PIPE_NEEDS_CS_STALL_BIT = (1 << 21),
1797
1798 /* This bit does not exist directly in PIPE_CONTROL. It means that render
1799 * target operations related to transfer commands with VkBuffer as
1800 * destination are ongoing. Some operations like copies on the command
1801 * streamer might need to be aware of this to trigger the appropriate stall
1802 * before they can proceed with the copy.
1803 */
1804 ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 22),
1805 };
1806
1807 #define ANV_PIPE_FLUSH_BITS ( \
1808 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \
1809 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
1810 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT)
1811
1812 #define ANV_PIPE_STALL_BITS ( \
1813 ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \
1814 ANV_PIPE_DEPTH_STALL_BIT | \
1815 ANV_PIPE_CS_STALL_BIT)
1816
1817 #define ANV_PIPE_INVALIDATE_BITS ( \
1818 ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \
1819 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \
1820 ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \
1821 ANV_PIPE_DATA_CACHE_FLUSH_BIT | \
1822 ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \
1823 ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT)
1824
1825 static inline enum anv_pipe_bits
1826 anv_pipe_flush_bits_for_access_flags(VkAccessFlags flags)
1827 {
1828 enum anv_pipe_bits pipe_bits = 0;
1829
1830 unsigned b;
1831 for_each_bit(b, flags) {
1832 switch ((VkAccessFlagBits)(1 << b)) {
1833 case VK_ACCESS_SHADER_WRITE_BIT:
1834 /* We're transitioning a buffer that was previously used as write
1835 * destination through the data port. To make its content available
1836 * to future operations, flush the data cache.
1837 */
1838 pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
1839 break;
1840 case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
1841 /* We're transitioning a buffer that was previously used as render
1842 * target. To make its content available to future operations, flush
1843 * the render target cache.
1844 */
1845 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1846 break;
1847 case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
1848 /* We're transitioning a buffer that was previously used as depth
1849 * buffer. To make its content available to future operations, flush
1850 * the depth cache.
1851 */
1852 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
1853 break;
1854 case VK_ACCESS_TRANSFER_WRITE_BIT:
1855 /* We're transitioning a buffer that was previously used as a
1856 * transfer write destination. Generic write operations include color
1857 * & depth operations as well as buffer operations like :
1858 * - vkCmdClearColorImage()
1859 * - vkCmdClearDepthStencilImage()
1860 * - vkCmdBlitImage()
1861 * - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()
1862 *
1863 * Most of these operations are implemented using Blorp which writes
1864 * through the render target, so flush that cache to make it visible
1865 * to future operations. And for depth related operations we also
1866 * need to flush the depth cache.
1867 */
1868 pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1869 pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
1870 break;
1871 case VK_ACCESS_MEMORY_WRITE_BIT:
1872 /* We're transitioning a buffer for generic write operations. Flush
1873 * all the caches.
1874 */
1875 pipe_bits |= ANV_PIPE_FLUSH_BITS;
1876 break;
1877 default:
1878 break; /* Nothing to do */
1879 }
1880 }
1881
1882 return pipe_bits;
1883 }
1884
1885 static inline enum anv_pipe_bits
1886 anv_pipe_invalidate_bits_for_access_flags(VkAccessFlags flags)
1887 {
1888 enum anv_pipe_bits pipe_bits = 0;
1889
1890 unsigned b;
1891 for_each_bit(b, flags) {
1892 switch ((VkAccessFlagBits)(1 << b)) {
1893 case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
1894 /* Indirect draw commands take a buffer as input that we're going to
1895 * read from the command streamer to load some of the HW registers
1896 * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
1897 * command streamer stall so that all the cache flushes have
1898 * completed before the command streamer loads from memory.
1899 */
1900 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
1901 /* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex
1902 * through a vertex buffer, so invalidate that cache.
1903 */
1904 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
1905 /* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a
1906 * UBO from the buffer, so we need to invalidate constant cache.
1907 */
1908 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
1909 break;
1910 case VK_ACCESS_INDEX_READ_BIT:
1911 case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
1912 /* We transitioning a buffer to be used for as input for vkCmdDraw*
1913 * commands, so we invalidate the VF cache to make sure there is no
1914 * stale data when we start rendering.
1915 */
1916 pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
1917 break;
1918 case VK_ACCESS_UNIFORM_READ_BIT:
1919 /* We transitioning a buffer to be used as uniform data. Because
1920 * uniform is accessed through the data port & sampler, we need to
1921 * invalidate the texture cache (sampler) & constant cache (data
1922 * port) to avoid stale data.
1923 */
1924 pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
1925 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
1926 break;
1927 case VK_ACCESS_SHADER_READ_BIT:
1928 case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
1929 case VK_ACCESS_TRANSFER_READ_BIT:
1930 /* Transitioning a buffer to be read through the sampler, so
1931 * invalidate the texture cache, we don't want any stale data.
1932 */
1933 pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
1934 break;
1935 case VK_ACCESS_MEMORY_READ_BIT:
1936 /* Transitioning a buffer for generic read, invalidate all the
1937 * caches.
1938 */
1939 pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
1940 break;
1941 case VK_ACCESS_MEMORY_WRITE_BIT:
1942 /* Generic write, make sure all previously written things land in
1943 * memory.
1944 */
1945 pipe_bits |= ANV_PIPE_FLUSH_BITS;
1946 break;
1947 case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
1948 /* Transitioning a buffer for conditional rendering. We'll load the
1949 * content of this buffer into HW registers using the command
1950 * streamer, so we need to stall the command streamer to make sure
1951 * any in-flight flush operations have completed.
1952 */
1953 pipe_bits |= ANV_PIPE_CS_STALL_BIT;
1954 break;
1955 default:
1956 break; /* Nothing to do */
1957 }
1958 }
1959
1960 return pipe_bits;
1961 }
1962
1963 #define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
1964 VK_IMAGE_ASPECT_COLOR_BIT | \
1965 VK_IMAGE_ASPECT_PLANE_0_BIT | \
1966 VK_IMAGE_ASPECT_PLANE_1_BIT | \
1967 VK_IMAGE_ASPECT_PLANE_2_BIT)
1968 #define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \
1969 VK_IMAGE_ASPECT_PLANE_0_BIT | \
1970 VK_IMAGE_ASPECT_PLANE_1_BIT | \
1971 VK_IMAGE_ASPECT_PLANE_2_BIT)
1972
1973 struct anv_vertex_binding {
1974 struct anv_buffer * buffer;
1975 VkDeviceSize offset;
1976 };
1977
1978 struct anv_xfb_binding {
1979 struct anv_buffer * buffer;
1980 VkDeviceSize offset;
1981 VkDeviceSize size;
1982 };
1983
1984 #define ANV_PARAM_PUSH(offset) ((1 << 16) | (uint32_t)(offset))
1985 #define ANV_PARAM_PUSH_OFFSET(param) ((param) & 0xffff)
1986
1987 struct anv_push_constants {
1988 /* Current allocated size of this push constants data structure.
1989 * Because a decent chunk of it may not be used (images on SKL, for
1990 * instance), we won't actually allocate the entire structure up-front.
1991 */
1992 uint32_t size;
1993
1994 /* Push constant data provided by the client through vkPushConstants */
1995 uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
1996
1997 /* Used for vkCmdDispatchBase */
1998 uint32_t base_work_group_id[3];
1999
2000 /* Image data for image_load_store on pre-SKL */
2001 struct brw_image_param images[MAX_GEN8_IMAGES];
2002 };
2003
2004 struct anv_dynamic_state {
2005 struct {
2006 uint32_t count;
2007 VkViewport viewports[MAX_VIEWPORTS];
2008 } viewport;
2009
2010 struct {
2011 uint32_t count;
2012 VkRect2D scissors[MAX_SCISSORS];
2013 } scissor;
2014
2015 float line_width;
2016
2017 struct {
2018 float bias;
2019 float clamp;
2020 float slope;
2021 } depth_bias;
2022
2023 float blend_constants[4];
2024
2025 struct {
2026 float min;
2027 float max;
2028 } depth_bounds;
2029
2030 struct {
2031 uint32_t front;
2032 uint32_t back;
2033 } stencil_compare_mask;
2034
2035 struct {
2036 uint32_t front;
2037 uint32_t back;
2038 } stencil_write_mask;
2039
2040 struct {
2041 uint32_t front;
2042 uint32_t back;
2043 } stencil_reference;
2044 };
2045
2046 extern const struct anv_dynamic_state default_dynamic_state;
2047
2048 void anv_dynamic_state_copy(struct anv_dynamic_state *dest,
2049 const struct anv_dynamic_state *src,
2050 uint32_t copy_mask);
2051
2052 struct anv_surface_state {
2053 struct anv_state state;
2054 /** Address of the surface referred to by this state
2055 *
2056 * This address is relative to the start of the BO.
2057 */
2058 struct anv_address address;
2059 /* Address of the aux surface, if any
2060 *
2061 * This field is ANV_NULL_ADDRESS if and only if no aux surface exists.
2062 *
2063 * With the exception of gen8, the bottom 12 bits of this address' offset
2064 * include extra aux information.
2065 */
2066 struct anv_address aux_address;
2067 /* Address of the clear color, if any
2068 *
2069 * This address is relative to the start of the BO.
2070 */
2071 struct anv_address clear_address;
2072 };
2073
2074 /**
2075 * Attachment state when recording a renderpass instance.
2076 *
2077 * The clear value is valid only if there exists a pending clear.
2078 */
2079 struct anv_attachment_state {
2080 enum isl_aux_usage aux_usage;
2081 enum isl_aux_usage input_aux_usage;
2082 struct anv_surface_state color;
2083 struct anv_surface_state input;
2084
2085 VkImageLayout current_layout;
2086 VkImageAspectFlags pending_clear_aspects;
2087 VkImageAspectFlags pending_load_aspects;
2088 bool fast_clear;
2089 VkClearValue clear_value;
2090 bool clear_color_is_zero_one;
2091 bool clear_color_is_zero;
2092
2093 /* When multiview is active, attachments with a renderpass clear
2094 * operation have their respective layers cleared on the first
2095 * subpass that uses them, and only in that subpass. We keep track
2096 * of this using a bitfield to indicate which layers of an attachment
2097 * have not been cleared yet when multiview is active.
2098 */
2099 uint32_t pending_clear_views;
2100 };
2101
2102 /** State tracking for particular pipeline bind point
2103 *
2104 * This struct is the base struct for anv_cmd_graphics_state and
2105 * anv_cmd_compute_state. These are used to track state which is bound to a
2106 * particular type of pipeline. Generic state that applies per-stage such as
2107 * binding table offsets and push constants is tracked generically with a
2108 * per-stage array in anv_cmd_state.
2109 */
2110 struct anv_cmd_pipeline_state {
2111 struct anv_pipeline *pipeline;
2112 struct anv_pipeline_layout *layout;
2113
2114 struct anv_descriptor_set *descriptors[MAX_SETS];
2115 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
2116
2117 struct anv_push_descriptor_set *push_descriptors[MAX_SETS];
2118 };
2119
2120 /** State tracking for graphics pipeline
2121 *
2122 * This has anv_cmd_pipeline_state as a base struct to track things which get
2123 * bound to a graphics pipeline. Along with general pipeline bind point state
2124 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2125 * state which is graphics-specific.
2126 */
2127 struct anv_cmd_graphics_state {
2128 struct anv_cmd_pipeline_state base;
2129
2130 anv_cmd_dirty_mask_t dirty;
2131 uint32_t vb_dirty;
2132
2133 struct anv_dynamic_state dynamic;
2134
2135 struct {
2136 struct anv_buffer *index_buffer;
2137 uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */
2138 uint32_t index_offset;
2139 } gen7;
2140 };
2141
2142 /** State tracking for compute pipeline
2143 *
2144 * This has anv_cmd_pipeline_state as a base struct to track things which get
2145 * bound to a compute pipeline. Along with general pipeline bind point state
2146 * which is in the anv_cmd_pipeline_state base struct, it also contains other
2147 * state which is compute-specific.
2148 */
2149 struct anv_cmd_compute_state {
2150 struct anv_cmd_pipeline_state base;
2151
2152 bool pipeline_dirty;
2153
2154 struct anv_address num_workgroups;
2155 };
2156
2157 /** State required while building cmd buffer */
2158 struct anv_cmd_state {
2159 /* PIPELINE_SELECT.PipelineSelection */
2160 uint32_t current_pipeline;
2161 const struct gen_l3_config * current_l3_config;
2162
2163 struct anv_cmd_graphics_state gfx;
2164 struct anv_cmd_compute_state compute;
2165
2166 enum anv_pipe_bits pending_pipe_bits;
2167 VkShaderStageFlags descriptors_dirty;
2168 VkShaderStageFlags push_constants_dirty;
2169
2170 struct anv_framebuffer * framebuffer;
2171 struct anv_render_pass * pass;
2172 struct anv_subpass * subpass;
2173 VkRect2D render_area;
2174 uint32_t restart_index;
2175 struct anv_vertex_binding vertex_bindings[MAX_VBS];
2176 bool xfb_enabled;
2177 struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];
2178 VkShaderStageFlags push_constant_stages;
2179 struct anv_push_constants * push_constants[MESA_SHADER_STAGES];
2180 struct anv_state binding_tables[MESA_SHADER_STAGES];
2181 struct anv_state samplers[MESA_SHADER_STAGES];
2182
2183 /**
2184 * Whether or not the gen8 PMA fix is enabled. We ensure that, at the top
2185 * of any command buffer it is disabled by disabling it in EndCommandBuffer
2186 * and before invoking the secondary in ExecuteCommands.
2187 */
2188 bool pma_fix_enabled;
2189
2190 /**
2191 * Whether or not we know for certain that HiZ is enabled for the current
2192 * subpass. If, for whatever reason, we are unsure as to whether HiZ is
2193 * enabled or not, this will be false.
2194 */
2195 bool hiz_enabled;
2196
2197 bool conditional_render_enabled;
2198
2199 /**
2200 * Array length is anv_cmd_state::pass::attachment_count. Array content is
2201 * valid only when recording a render pass instance.
2202 */
2203 struct anv_attachment_state * attachments;
2204
2205 /**
2206 * Surface states for color render targets. These are stored in a single
2207 * flat array. For depth-stencil attachments, the surface state is simply
2208 * left blank.
2209 */
2210 struct anv_state render_pass_states;
2211
2212 /**
2213 * A null surface state of the right size to match the framebuffer. This
2214 * is one of the states in render_pass_states.
2215 */
2216 struct anv_state null_surface_state;
2217 };
2218
2219 struct anv_cmd_pool {
2220 VkAllocationCallbacks alloc;
2221 struct list_head cmd_buffers;
2222 };
2223
2224 #define ANV_CMD_BUFFER_BATCH_SIZE 8192
2225
2226 enum anv_cmd_buffer_exec_mode {
2227 ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,
2228 ANV_CMD_BUFFER_EXEC_MODE_EMIT,
2229 ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,
2230 ANV_CMD_BUFFER_EXEC_MODE_CHAIN,
2231 ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,
2232 };
2233
2234 struct anv_cmd_buffer {
2235 VK_LOADER_DATA _loader_data;
2236
2237 struct anv_device * device;
2238
2239 struct anv_cmd_pool * pool;
2240 struct list_head pool_link;
2241
2242 struct anv_batch batch;
2243
2244 /* Fields required for the actual chain of anv_batch_bo's.
2245 *
2246 * These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().
2247 */
2248 struct list_head batch_bos;
2249 enum anv_cmd_buffer_exec_mode exec_mode;
2250
2251 /* A vector of anv_batch_bo pointers for every batch or surface buffer
2252 * referenced by this command buffer
2253 *
2254 * initialized by anv_cmd_buffer_init_batch_bo_chain()
2255 */
2256 struct u_vector seen_bbos;
2257
2258 /* A vector of int32_t's for every block of binding tables.
2259 *
2260 * initialized by anv_cmd_buffer_init_batch_bo_chain()
2261 */
2262 struct u_vector bt_block_states;
2263 uint32_t bt_next;
2264
2265 struct anv_reloc_list surface_relocs;
2266 /** Last seen surface state block pool center bo offset */
2267 uint32_t last_ss_pool_center;
2268
2269 /* Serial for tracking buffer completion */
2270 uint32_t serial;
2271
2272 /* Stream objects for storing temporary data */
2273 struct anv_state_stream surface_state_stream;
2274 struct anv_state_stream dynamic_state_stream;
2275
2276 VkCommandBufferUsageFlags usage_flags;
2277 VkCommandBufferLevel level;
2278
2279 struct anv_cmd_state state;
2280 };
2281
2282 VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2283 void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2284 void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);
2285 void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
2286 void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
2287 struct anv_cmd_buffer *secondary);
2288 void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
2289 VkResult anv_cmd_buffer_execbuf(struct anv_device *device,
2290 struct anv_cmd_buffer *cmd_buffer,
2291 const VkSemaphore *in_semaphores,
2292 uint32_t num_in_semaphores,
2293 const VkSemaphore *out_semaphores,
2294 uint32_t num_out_semaphores,
2295 VkFence fence);
2296
2297 VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
2298
2299 VkResult
2300 anv_cmd_buffer_ensure_push_constants_size(struct anv_cmd_buffer *cmd_buffer,
2301 gl_shader_stage stage, uint32_t size);
2302 #define anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, field) \
2303 anv_cmd_buffer_ensure_push_constants_size(cmd_buffer, stage, \
2304 (offsetof(struct anv_push_constants, field) + \
2305 sizeof(cmd_buffer->state.push_constants[0]->field)))
2306
2307 struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
2308 const void *data, uint32_t size, uint32_t alignment);
2309 struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
2310 uint32_t *a, uint32_t *b,
2311 uint32_t dwords, uint32_t alignment);
2312
2313 struct anv_address
2314 anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);
2315 struct anv_state
2316 anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
2317 uint32_t entries, uint32_t *state_offset);
2318 struct anv_state
2319 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);
2320 struct anv_state
2321 anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
2322 uint32_t size, uint32_t alignment);
2323
2324 VkResult
2325 anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);
2326
2327 void gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);
2328 void gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
2329 bool depth_clamp_enable);
2330 void gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);
2331
2332 void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,
2333 struct anv_render_pass *pass,
2334 struct anv_framebuffer *framebuffer,
2335 const VkClearValue *clear_values);
2336
2337 void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);
2338
2339 struct anv_state
2340 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
2341 gl_shader_stage stage);
2342 struct anv_state
2343 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);
2344
2345 const struct anv_image_view *
2346 anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);
2347
2348 VkResult
2349 anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
2350 uint32_t num_entries,
2351 uint32_t *state_offset,
2352 struct anv_state *bt_state);
2353
2354 void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
2355
2356 void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);
2357
2358 enum anv_fence_type {
2359 ANV_FENCE_TYPE_NONE = 0,
2360 ANV_FENCE_TYPE_BO,
2361 ANV_FENCE_TYPE_SYNCOBJ,
2362 ANV_FENCE_TYPE_WSI,
2363 };
2364
2365 enum anv_bo_fence_state {
2366 /** Indicates that this is a new (or newly reset fence) */
2367 ANV_BO_FENCE_STATE_RESET,
2368
2369 /** Indicates that this fence has been submitted to the GPU but is still
2370 * (as far as we know) in use by the GPU.
2371 */
2372 ANV_BO_FENCE_STATE_SUBMITTED,
2373
2374 ANV_BO_FENCE_STATE_SIGNALED,
2375 };
2376
2377 struct anv_fence_impl {
2378 enum anv_fence_type type;
2379
2380 union {
2381 /** Fence implementation for BO fences
2382 *
2383 * These fences use a BO and a set of CPU-tracked state flags. The BO
2384 * is added to the object list of the last execbuf call in a QueueSubmit
2385 * and is marked EXEC_WRITE. The state flags track when the BO has been
2386 * submitted to the kernel. We need to do this because Vulkan lets you
2387 * wait on a fence that has not yet been submitted and I915_GEM_BUSY
2388 * will say it's idle in this case.
2389 */
2390 struct {
2391 struct anv_bo bo;
2392 enum anv_bo_fence_state state;
2393 } bo;
2394
2395 /** DRM syncobj handle for syncobj-based fences */
2396 uint32_t syncobj;
2397
2398 /** WSI fence */
2399 struct wsi_fence *fence_wsi;
2400 };
2401 };
2402
2403 struct anv_fence {
2404 /* Permanent fence state. Every fence has some form of permanent state
2405 * (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on (for
2406 * cross-process fences) or it could just be a dummy for use internally.
2407 */
2408 struct anv_fence_impl permanent;
2409
2410 /* Temporary fence state. A fence *may* have temporary state. That state
2411 * is added to the fence by an import operation and is reset back to
2412 * ANV_SEMAPHORE_TYPE_NONE when the fence is reset. A fence with temporary
2413 * state cannot be signaled because the fence must already be signaled
2414 * before the temporary state can be exported from the fence in the other
2415 * process and imported here.
2416 */
2417 struct anv_fence_impl temporary;
2418 };
2419
2420 struct anv_event {
2421 uint64_t semaphore;
2422 struct anv_state state;
2423 };
2424
2425 enum anv_semaphore_type {
2426 ANV_SEMAPHORE_TYPE_NONE = 0,
2427 ANV_SEMAPHORE_TYPE_DUMMY,
2428 ANV_SEMAPHORE_TYPE_BO,
2429 ANV_SEMAPHORE_TYPE_SYNC_FILE,
2430 ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
2431 };
2432
2433 struct anv_semaphore_impl {
2434 enum anv_semaphore_type type;
2435
2436 union {
2437 /* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO.
2438 * This BO will be added to the object list on any execbuf2 calls for
2439 * which this semaphore is used as a wait or signal fence. When used as
2440 * a signal fence, the EXEC_OBJECT_WRITE flag will be set.
2441 */
2442 struct anv_bo *bo;
2443
2444 /* The sync file descriptor when type == ANV_SEMAPHORE_TYPE_SYNC_FILE.
2445 * If the semaphore is in the unsignaled state due to either just being
2446 * created or because it has been used for a wait, fd will be -1.
2447 */
2448 int fd;
2449
2450 /* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
2451 * Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
2452 * import so we don't need to bother with a userspace cache.
2453 */
2454 uint32_t syncobj;
2455 };
2456 };
2457
2458 struct anv_semaphore {
2459 /* Permanent semaphore state. Every semaphore has some form of permanent
2460 * state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on
2461 * (for cross-process semaphores0 or it could just be a dummy for use
2462 * internally.
2463 */
2464 struct anv_semaphore_impl permanent;
2465
2466 /* Temporary semaphore state. A semaphore *may* have temporary state.
2467 * That state is added to the semaphore by an import operation and is reset
2468 * back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on. A
2469 * semaphore with temporary state cannot be signaled because the semaphore
2470 * must already be signaled before the temporary state can be exported from
2471 * the semaphore in the other process and imported here.
2472 */
2473 struct anv_semaphore_impl temporary;
2474 };
2475
2476 void anv_semaphore_reset_temporary(struct anv_device *device,
2477 struct anv_semaphore *semaphore);
2478
2479 struct anv_shader_module {
2480 unsigned char sha1[20];
2481 uint32_t size;
2482 char data[0];
2483 };
2484
2485 static inline gl_shader_stage
2486 vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
2487 {
2488 assert(__builtin_popcount(vk_stage) == 1);
2489 return ffs(vk_stage) - 1;
2490 }
2491
2492 static inline VkShaderStageFlagBits
2493 mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
2494 {
2495 return (1 << mesa_stage);
2496 }
2497
2498 #define ANV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
2499
2500 #define anv_foreach_stage(stage, stage_bits) \
2501 for (gl_shader_stage stage, \
2502 __tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \
2503 stage = __builtin_ffs(__tmp) - 1, __tmp; \
2504 __tmp &= ~(1 << (stage)))
2505
2506 struct anv_pipeline_bind_map {
2507 uint32_t surface_count;
2508 uint32_t sampler_count;
2509 uint32_t image_count;
2510
2511 struct anv_pipeline_binding * surface_to_descriptor;
2512 struct anv_pipeline_binding * sampler_to_descriptor;
2513 };
2514
2515 struct anv_shader_bin_key {
2516 uint32_t size;
2517 uint8_t data[0];
2518 };
2519
2520 struct anv_shader_bin {
2521 uint32_t ref_cnt;
2522
2523 const struct anv_shader_bin_key *key;
2524
2525 struct anv_state kernel;
2526 uint32_t kernel_size;
2527
2528 struct anv_state constant_data;
2529 uint32_t constant_data_size;
2530
2531 const struct brw_stage_prog_data *prog_data;
2532 uint32_t prog_data_size;
2533
2534 struct nir_xfb_info *xfb_info;
2535
2536 struct anv_pipeline_bind_map bind_map;
2537 };
2538
2539 struct anv_shader_bin *
2540 anv_shader_bin_create(struct anv_device *device,
2541 const void *key, uint32_t key_size,
2542 const void *kernel, uint32_t kernel_size,
2543 const void *constant_data, uint32_t constant_data_size,
2544 const struct brw_stage_prog_data *prog_data,
2545 uint32_t prog_data_size, const void *prog_data_param,
2546 const struct nir_xfb_info *xfb_info,
2547 const struct anv_pipeline_bind_map *bind_map);
2548
2549 void
2550 anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);
2551
2552 static inline void
2553 anv_shader_bin_ref(struct anv_shader_bin *shader)
2554 {
2555 assert(shader && shader->ref_cnt >= 1);
2556 p_atomic_inc(&shader->ref_cnt);
2557 }
2558
2559 static inline void
2560 anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
2561 {
2562 assert(shader && shader->ref_cnt >= 1);
2563 if (p_atomic_dec_zero(&shader->ref_cnt))
2564 anv_shader_bin_destroy(device, shader);
2565 }
2566
2567 struct anv_pipeline {
2568 struct anv_device * device;
2569 struct anv_batch batch;
2570 uint32_t batch_data[512];
2571 struct anv_reloc_list batch_relocs;
2572 uint32_t dynamic_state_mask;
2573 struct anv_dynamic_state dynamic_state;
2574
2575 struct anv_subpass * subpass;
2576
2577 bool needs_data_cache;
2578
2579 struct anv_shader_bin * shaders[MESA_SHADER_STAGES];
2580
2581 struct {
2582 const struct gen_l3_config * l3_config;
2583 uint32_t total_size;
2584 } urb;
2585
2586 VkShaderStageFlags active_stages;
2587 struct anv_state blend_state;
2588
2589 uint32_t vb_used;
2590 struct anv_pipeline_vertex_binding {
2591 uint32_t stride;
2592 bool instanced;
2593 uint32_t instance_divisor;
2594 } vb[MAX_VBS];
2595
2596 uint8_t xfb_used;
2597
2598 bool primitive_restart;
2599 uint32_t topology;
2600
2601 uint32_t cs_right_mask;
2602
2603 bool writes_depth;
2604 bool depth_test_enable;
2605 bool writes_stencil;
2606 bool stencil_test_enable;
2607 bool depth_clamp_enable;
2608 bool sample_shading_enable;
2609 bool kill_pixel;
2610
2611 struct {
2612 uint32_t sf[7];
2613 uint32_t depth_stencil_state[3];
2614 } gen7;
2615
2616 struct {
2617 uint32_t sf[4];
2618 uint32_t raster[5];
2619 uint32_t wm_depth_stencil[3];
2620 } gen8;
2621
2622 struct {
2623 uint32_t wm_depth_stencil[4];
2624 } gen9;
2625
2626 uint32_t interface_descriptor_data[8];
2627 };
2628
2629 static inline bool
2630 anv_pipeline_has_stage(const struct anv_pipeline *pipeline,
2631 gl_shader_stage stage)
2632 {
2633 return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;
2634 }
2635
2636 #define ANV_DECL_GET_PROG_DATA_FUNC(prefix, stage) \
2637 static inline const struct brw_##prefix##_prog_data * \
2638 get_##prefix##_prog_data(const struct anv_pipeline *pipeline) \
2639 { \
2640 if (anv_pipeline_has_stage(pipeline, stage)) { \
2641 return (const struct brw_##prefix##_prog_data *) \
2642 pipeline->shaders[stage]->prog_data; \
2643 } else { \
2644 return NULL; \
2645 } \
2646 }
2647
2648 ANV_DECL_GET_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)
2649 ANV_DECL_GET_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)
2650 ANV_DECL_GET_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)
2651 ANV_DECL_GET_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)
2652 ANV_DECL_GET_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)
2653 ANV_DECL_GET_PROG_DATA_FUNC(cs, MESA_SHADER_COMPUTE)
2654
2655 static inline const struct brw_vue_prog_data *
2656 anv_pipeline_get_last_vue_prog_data(const struct anv_pipeline *pipeline)
2657 {
2658 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
2659 return &get_gs_prog_data(pipeline)->base;
2660 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2661 return &get_tes_prog_data(pipeline)->base;
2662 else
2663 return &get_vs_prog_data(pipeline)->base;
2664 }
2665
2666 VkResult
2667 anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device,
2668 struct anv_pipeline_cache *cache,
2669 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2670 const VkAllocationCallbacks *alloc);
2671
2672 VkResult
2673 anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
2674 struct anv_pipeline_cache *cache,
2675 const VkComputePipelineCreateInfo *info,
2676 const struct anv_shader_module *module,
2677 const char *entrypoint,
2678 const VkSpecializationInfo *spec_info);
2679
2680 struct anv_format_plane {
2681 enum isl_format isl_format:16;
2682 struct isl_swizzle swizzle;
2683
2684 /* Whether this plane contains chroma channels */
2685 bool has_chroma;
2686
2687 /* For downscaling of YUV planes */
2688 uint8_t denominator_scales[2];
2689
2690 /* How to map sampled ycbcr planes to a single 4 component element. */
2691 struct isl_swizzle ycbcr_swizzle;
2692
2693 /* What aspect is associated to this plane */
2694 VkImageAspectFlags aspect;
2695 };
2696
2697
2698 struct anv_format {
2699 struct anv_format_plane planes[3];
2700 VkFormat vk_format;
2701 uint8_t n_planes;
2702 bool can_ycbcr;
2703 };
2704
2705 static inline uint32_t
2706 anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,
2707 VkImageAspectFlags aspect_mask)
2708 {
2709 switch (aspect_mask) {
2710 case VK_IMAGE_ASPECT_COLOR_BIT:
2711 case VK_IMAGE_ASPECT_DEPTH_BIT:
2712 case VK_IMAGE_ASPECT_PLANE_0_BIT:
2713 return 0;
2714 case VK_IMAGE_ASPECT_STENCIL_BIT:
2715 if ((image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)
2716 return 0;
2717 /* Fall-through */
2718 case VK_IMAGE_ASPECT_PLANE_1_BIT:
2719 return 1;
2720 case VK_IMAGE_ASPECT_PLANE_2_BIT:
2721 return 2;
2722 default:
2723 /* Purposefully assert with depth/stencil aspects. */
2724 unreachable("invalid image aspect");
2725 }
2726 }
2727
2728 static inline VkImageAspectFlags
2729 anv_plane_to_aspect(VkImageAspectFlags image_aspects,
2730 uint32_t plane)
2731 {
2732 if (image_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
2733 if (util_bitcount(image_aspects) > 1)
2734 return VK_IMAGE_ASPECT_PLANE_0_BIT << plane;
2735 return VK_IMAGE_ASPECT_COLOR_BIT;
2736 }
2737 if (image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2738 return VK_IMAGE_ASPECT_DEPTH_BIT << plane;
2739 assert(image_aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
2740 return VK_IMAGE_ASPECT_STENCIL_BIT;
2741 }
2742
2743 #define anv_foreach_image_aspect_bit(b, image, aspects) \
2744 for_each_bit(b, anv_image_expand_aspects(image, aspects))
2745
2746 const struct anv_format *
2747 anv_get_format(VkFormat format);
2748
2749 static inline uint32_t
2750 anv_get_format_planes(VkFormat vk_format)
2751 {
2752 const struct anv_format *format = anv_get_format(vk_format);
2753
2754 return format != NULL ? format->n_planes : 0;
2755 }
2756
2757 struct anv_format_plane
2758 anv_get_format_plane(const struct gen_device_info *devinfo, VkFormat vk_format,
2759 VkImageAspectFlagBits aspect, VkImageTiling tiling);
2760
2761 static inline enum isl_format
2762 anv_get_isl_format(const struct gen_device_info *devinfo, VkFormat vk_format,
2763 VkImageAspectFlags aspect, VkImageTiling tiling)
2764 {
2765 return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format;
2766 }
2767
2768 static inline struct isl_swizzle
2769 anv_swizzle_for_render(struct isl_swizzle swizzle)
2770 {
2771 /* Sometimes the swizzle will have alpha map to one. We do this to fake
2772 * RGB as RGBA for texturing
2773 */
2774 assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||
2775 swizzle.a == ISL_CHANNEL_SELECT_ALPHA);
2776
2777 /* But it doesn't matter what we render to that channel */
2778 swizzle.a = ISL_CHANNEL_SELECT_ALPHA;
2779
2780 return swizzle;
2781 }
2782
2783 void
2784 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);
2785
2786 /**
2787 * Subsurface of an anv_image.
2788 */
2789 struct anv_surface {
2790 /** Valid only if isl_surf::size_B > 0. */
2791 struct isl_surf isl;
2792
2793 /**
2794 * Offset from VkImage's base address, as bound by vkBindImageMemory().
2795 */
2796 uint32_t offset;
2797 };
2798
2799 struct anv_image {
2800 VkImageType type;
2801 /* The original VkFormat provided by the client. This may not match any
2802 * of the actual surface formats.
2803 */
2804 VkFormat vk_format;
2805 const struct anv_format *format;
2806
2807 VkImageAspectFlags aspects;
2808 VkExtent3D extent;
2809 uint32_t levels;
2810 uint32_t array_size;
2811 uint32_t samples; /**< VkImageCreateInfo::samples */
2812 uint32_t n_planes;
2813 VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
2814 VkImageCreateFlags create_flags; /* Flags used when creating image. */
2815 VkImageTiling tiling; /** VkImageCreateInfo::tiling */
2816
2817 /** True if this is needs to be bound to an appropriately tiled BO.
2818 *
2819 * When not using modifiers, consumers such as X11, Wayland, and KMS need
2820 * the tiling passed via I915_GEM_SET_TILING. When exporting these buffers
2821 * we require a dedicated allocation so that we can know to allocate a
2822 * tiled buffer.
2823 */
2824 bool needs_set_tiling;
2825
2826 /**
2827 * Must be DRM_FORMAT_MOD_INVALID unless tiling is
2828 * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.
2829 */
2830 uint64_t drm_format_mod;
2831
2832 VkDeviceSize size;
2833 uint32_t alignment;
2834
2835 /* Whether the image is made of several underlying buffer objects rather a
2836 * single one with different offsets.
2837 */
2838 bool disjoint;
2839
2840 /* All the formats that can be used when creating views of this image
2841 * are CCS_E compatible.
2842 */
2843 bool ccs_e_compatible;
2844
2845 /* Image was created with external format. */
2846 bool external_format;
2847
2848 /**
2849 * Image subsurfaces
2850 *
2851 * For each foo, anv_image::planes[x].surface is valid if and only if
2852 * anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()
2853 * to figure the number associated with a given aspect.
2854 *
2855 * The hardware requires that the depth buffer and stencil buffer be
2856 * separate surfaces. From Vulkan's perspective, though, depth and stencil
2857 * reside in the same VkImage. To satisfy both the hardware and Vulkan, we
2858 * allocate the depth and stencil buffers as separate surfaces in the same
2859 * bo.
2860 *
2861 * Memory layout :
2862 *
2863 * -----------------------
2864 * | surface0 | /|\
2865 * ----------------------- |
2866 * | shadow surface0 | |
2867 * ----------------------- | Plane 0
2868 * | aux surface0 | |
2869 * ----------------------- |
2870 * | fast clear colors0 | \|/
2871 * -----------------------
2872 * | surface1 | /|\
2873 * ----------------------- |
2874 * | shadow surface1 | |
2875 * ----------------------- | Plane 1
2876 * | aux surface1 | |
2877 * ----------------------- |
2878 * | fast clear colors1 | \|/
2879 * -----------------------
2880 * | ... |
2881 * | |
2882 * -----------------------
2883 */
2884 struct {
2885 /**
2886 * Offset of the entire plane (whenever the image is disjoint this is
2887 * set to 0).
2888 */
2889 uint32_t offset;
2890
2891 VkDeviceSize size;
2892 uint32_t alignment;
2893
2894 struct anv_surface surface;
2895
2896 /**
2897 * A surface which shadows the main surface and may have different
2898 * tiling. This is used for sampling using a tiling that isn't supported
2899 * for other operations.
2900 */
2901 struct anv_surface shadow_surface;
2902
2903 /**
2904 * For color images, this is the aux usage for this image when not used
2905 * as a color attachment.
2906 *
2907 * For depth/stencil images, this is set to ISL_AUX_USAGE_HIZ if the
2908 * image has a HiZ buffer.
2909 */
2910 enum isl_aux_usage aux_usage;
2911
2912 struct anv_surface aux_surface;
2913
2914 /**
2915 * Offset of the fast clear state (used to compute the
2916 * fast_clear_state_offset of the following planes).
2917 */
2918 uint32_t fast_clear_state_offset;
2919
2920 /**
2921 * BO associated with this plane, set when bound.
2922 */
2923 struct anv_address address;
2924
2925 /**
2926 * When destroying the image, also free the bo.
2927 * */
2928 bool bo_is_owned;
2929 } planes[3];
2930 };
2931
2932 /* The ordering of this enum is important */
2933 enum anv_fast_clear_type {
2934 /** Image does not have/support any fast-clear blocks */
2935 ANV_FAST_CLEAR_NONE = 0,
2936 /** Image has/supports fast-clear but only to the default value */
2937 ANV_FAST_CLEAR_DEFAULT_VALUE = 1,
2938 /** Image has/supports fast-clear with an arbitrary fast-clear value */
2939 ANV_FAST_CLEAR_ANY = 2,
2940 };
2941
2942 /* Returns the number of auxiliary buffer levels attached to an image. */
2943 static inline uint8_t
2944 anv_image_aux_levels(const struct anv_image * const image,
2945 VkImageAspectFlagBits aspect)
2946 {
2947 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
2948 return image->planes[plane].aux_surface.isl.size_B > 0 ?
2949 image->planes[plane].aux_surface.isl.levels : 0;
2950 }
2951
2952 /* Returns the number of auxiliary buffer layers attached to an image. */
2953 static inline uint32_t
2954 anv_image_aux_layers(const struct anv_image * const image,
2955 VkImageAspectFlagBits aspect,
2956 const uint8_t miplevel)
2957 {
2958 assert(image);
2959
2960 /* The miplevel must exist in the main buffer. */
2961 assert(miplevel < image->levels);
2962
2963 if (miplevel >= anv_image_aux_levels(image, aspect)) {
2964 /* There are no layers with auxiliary data because the miplevel has no
2965 * auxiliary data.
2966 */
2967 return 0;
2968 } else {
2969 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
2970 return MAX2(image->planes[plane].aux_surface.isl.logical_level0_px.array_len,
2971 image->planes[plane].aux_surface.isl.logical_level0_px.depth >> miplevel);
2972 }
2973 }
2974
2975 static inline struct anv_address
2976 anv_image_get_clear_color_addr(const struct anv_device *device,
2977 const struct anv_image *image,
2978 VkImageAspectFlagBits aspect)
2979 {
2980 assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
2981
2982 uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
2983 return anv_address_add(image->planes[plane].address,
2984 image->planes[plane].fast_clear_state_offset);
2985 }
2986
2987 static inline struct anv_address
2988 anv_image_get_fast_clear_type_addr(const struct anv_device *device,
2989 const struct anv_image *image,
2990 VkImageAspectFlagBits aspect)
2991 {
2992 struct anv_address addr =
2993 anv_image_get_clear_color_addr(device, image, aspect);
2994
2995 const unsigned clear_color_state_size = device->info.gen >= 10 ?
2996 device->isl_dev.ss.clear_color_state_size :
2997 device->isl_dev.ss.clear_value_size;
2998 return anv_address_add(addr, clear_color_state_size);
2999 }
3000
3001 static inline struct anv_address
3002 anv_image_get_compression_state_addr(const struct anv_device *device,
3003 const struct anv_image *image,
3004 VkImageAspectFlagBits aspect,
3005 uint32_t level, uint32_t array_layer)
3006 {
3007 assert(level < anv_image_aux_levels(image, aspect));
3008 assert(array_layer < anv_image_aux_layers(image, aspect, level));
3009 UNUSED uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
3010 assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);
3011
3012 struct anv_address addr =
3013 anv_image_get_fast_clear_type_addr(device, image, aspect);
3014 addr.offset += 4; /* Go past the fast clear type */
3015
3016 if (image->type == VK_IMAGE_TYPE_3D) {
3017 for (uint32_t l = 0; l < level; l++)
3018 addr.offset += anv_minify(image->extent.depth, l) * 4;
3019 } else {
3020 addr.offset += level * image->array_size * 4;
3021 }
3022 addr.offset += array_layer * 4;
3023
3024 return addr;
3025 }
3026
3027 /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
3028 static inline bool
3029 anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
3030 const struct anv_image *image)
3031 {
3032 if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
3033 return false;
3034
3035 if (devinfo->gen < 8)
3036 return false;
3037
3038 return image->samples == 1;
3039 }
3040
3041 void
3042 anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
3043 const struct anv_image *image,
3044 VkImageAspectFlagBits aspect,
3045 enum isl_aux_usage aux_usage,
3046 uint32_t level,
3047 uint32_t base_layer,
3048 uint32_t layer_count);
3049
3050 void
3051 anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
3052 const struct anv_image *image,
3053 VkImageAspectFlagBits aspect,
3054 enum isl_aux_usage aux_usage,
3055 enum isl_format format, struct isl_swizzle swizzle,
3056 uint32_t level, uint32_t base_layer, uint32_t layer_count,
3057 VkRect2D area, union isl_color_value clear_color);
3058 void
3059 anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3060 const struct anv_image *image,
3061 VkImageAspectFlags aspects,
3062 enum isl_aux_usage depth_aux_usage,
3063 uint32_t level,
3064 uint32_t base_layer, uint32_t layer_count,
3065 VkRect2D area,
3066 float depth_value, uint8_t stencil_value);
3067 void
3068 anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
3069 const struct anv_image *src_image,
3070 enum isl_aux_usage src_aux_usage,
3071 uint32_t src_level, uint32_t src_base_layer,
3072 const struct anv_image *dst_image,
3073 enum isl_aux_usage dst_aux_usage,
3074 uint32_t dst_level, uint32_t dst_base_layer,
3075 VkImageAspectFlagBits aspect,
3076 uint32_t src_x, uint32_t src_y,
3077 uint32_t dst_x, uint32_t dst_y,
3078 uint32_t width, uint32_t height,
3079 uint32_t layer_count,
3080 enum blorp_filter filter);
3081 void
3082 anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
3083 const struct anv_image *image,
3084 VkImageAspectFlagBits aspect, uint32_t level,
3085 uint32_t base_layer, uint32_t layer_count,
3086 enum isl_aux_op hiz_op);
3087 void
3088 anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
3089 const struct anv_image *image,
3090 VkImageAspectFlags aspects,
3091 uint32_t level,
3092 uint32_t base_layer, uint32_t layer_count,
3093 VkRect2D area, uint8_t stencil_value);
3094 void
3095 anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
3096 const struct anv_image *image,
3097 enum isl_format format,
3098 VkImageAspectFlagBits aspect,
3099 uint32_t base_layer, uint32_t layer_count,
3100 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
3101 bool predicate);
3102 void
3103 anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
3104 const struct anv_image *image,
3105 enum isl_format format,
3106 VkImageAspectFlagBits aspect, uint32_t level,
3107 uint32_t base_layer, uint32_t layer_count,
3108 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
3109 bool predicate);
3110
3111 void
3112 anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
3113 const struct anv_image *image,
3114 uint32_t base_level, uint32_t level_count,
3115 uint32_t base_layer, uint32_t layer_count);
3116
3117 enum isl_aux_usage
3118 anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
3119 const struct anv_image *image,
3120 const VkImageAspectFlagBits aspect,
3121 const VkImageLayout layout);
3122
3123 enum anv_fast_clear_type
3124 anv_layout_to_fast_clear_type(const struct gen_device_info * const devinfo,
3125 const struct anv_image * const image,
3126 const VkImageAspectFlagBits aspect,
3127 const VkImageLayout layout);
3128
3129 /* This is defined as a macro so that it works for both
3130 * VkImageSubresourceRange and VkImageSubresourceLayers
3131 */
3132 #define anv_get_layerCount(_image, _range) \
3133 ((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \
3134 (_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)
3135
3136 static inline uint32_t
3137 anv_get_levelCount(const struct anv_image *image,
3138 const VkImageSubresourceRange *range)
3139 {
3140 return range->levelCount == VK_REMAINING_MIP_LEVELS ?
3141 image->levels - range->baseMipLevel : range->levelCount;
3142 }
3143
3144 static inline VkImageAspectFlags
3145 anv_image_expand_aspects(const struct anv_image *image,
3146 VkImageAspectFlags aspects)
3147 {
3148 /* If the underlying image has color plane aspects and
3149 * VK_IMAGE_ASPECT_COLOR_BIT has been requested, then return the aspects of
3150 * the underlying image. */
3151 if ((image->aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) != 0 &&
3152 aspects == VK_IMAGE_ASPECT_COLOR_BIT)
3153 return image->aspects;
3154
3155 return aspects;
3156 }
3157
3158 static inline bool
3159 anv_image_aspects_compatible(VkImageAspectFlags aspects1,
3160 VkImageAspectFlags aspects2)
3161 {
3162 if (aspects1 == aspects2)
3163 return true;
3164
3165 /* Only 1 color aspects are compatibles. */
3166 if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3167 (aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&
3168 util_bitcount(aspects1) == util_bitcount(aspects2))
3169 return true;
3170
3171 return false;
3172 }
3173
3174 struct anv_image_view {
3175 const struct anv_image *image; /**< VkImageViewCreateInfo::image */
3176
3177 VkImageAspectFlags aspect_mask;
3178 VkFormat vk_format;
3179 VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
3180
3181 unsigned n_planes;
3182 struct {
3183 uint32_t image_plane;
3184
3185 struct isl_view isl;
3186
3187 /**
3188 * RENDER_SURFACE_STATE when using image as a sampler surface with an
3189 * image layout of SHADER_READ_ONLY_OPTIMAL or
3190 * DEPTH_STENCIL_READ_ONLY_OPTIMAL.
3191 */
3192 struct anv_surface_state optimal_sampler_surface_state;
3193
3194 /**
3195 * RENDER_SURFACE_STATE when using image as a sampler surface with an
3196 * image layout of GENERAL.
3197 */
3198 struct anv_surface_state general_sampler_surface_state;
3199
3200 /**
3201 * RENDER_SURFACE_STATE when using image as a storage image. Separate
3202 * states for write-only and readable, using the real format for
3203 * write-only and the lowered format for readable.
3204 */
3205 struct anv_surface_state storage_surface_state;
3206 struct anv_surface_state writeonly_storage_surface_state;
3207
3208 struct brw_image_param storage_image_param;
3209 } planes[3];
3210 };
3211
3212 enum anv_image_view_state_flags {
3213 ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY = (1 << 0),
3214 ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 1),
3215 };
3216
3217 void anv_image_fill_surface_state(struct anv_device *device,
3218 const struct anv_image *image,
3219 VkImageAspectFlagBits aspect,
3220 const struct isl_view *view,
3221 isl_surf_usage_flags_t view_usage,
3222 enum isl_aux_usage aux_usage,
3223 const union isl_color_value *clear_color,
3224 enum anv_image_view_state_flags flags,
3225 struct anv_surface_state *state_inout,
3226 struct brw_image_param *image_param_out);
3227
3228 struct anv_image_create_info {
3229 const VkImageCreateInfo *vk_info;
3230
3231 /** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */
3232 isl_tiling_flags_t isl_tiling_flags;
3233
3234 /** These flags will be added to any derived from VkImageCreateInfo. */
3235 isl_surf_usage_flags_t isl_extra_usage_flags;
3236
3237 uint32_t stride;
3238 bool external_format;
3239 };
3240
3241 VkResult anv_image_create(VkDevice _device,
3242 const struct anv_image_create_info *info,
3243 const VkAllocationCallbacks* alloc,
3244 VkImage *pImage);
3245
3246 const struct anv_surface *
3247 anv_image_get_surface_for_aspect_mask(const struct anv_image *image,
3248 VkImageAspectFlags aspect_mask);
3249
3250 enum isl_format
3251 anv_isl_format_for_descriptor_type(VkDescriptorType type);
3252
3253 static inline struct VkExtent3D
3254 anv_sanitize_image_extent(const VkImageType imageType,
3255 const struct VkExtent3D imageExtent)
3256 {
3257 switch (imageType) {
3258 case VK_IMAGE_TYPE_1D:
3259 return (VkExtent3D) { imageExtent.width, 1, 1 };
3260 case VK_IMAGE_TYPE_2D:
3261 return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
3262 case VK_IMAGE_TYPE_3D:
3263 return imageExtent;
3264 default:
3265 unreachable("invalid image type");
3266 }
3267 }
3268
3269 static inline struct VkOffset3D
3270 anv_sanitize_image_offset(const VkImageType imageType,
3271 const struct VkOffset3D imageOffset)
3272 {
3273 switch (imageType) {
3274 case VK_IMAGE_TYPE_1D:
3275 return (VkOffset3D) { imageOffset.x, 0, 0 };
3276 case VK_IMAGE_TYPE_2D:
3277 return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
3278 case VK_IMAGE_TYPE_3D:
3279 return imageOffset;
3280 default:
3281 unreachable("invalid image type");
3282 }
3283 }
3284
3285 VkFormatFeatureFlags
3286 anv_get_image_format_features(const struct gen_device_info *devinfo,
3287 VkFormat vk_format,
3288 const struct anv_format *anv_format,
3289 VkImageTiling vk_tiling);
3290
3291 void anv_fill_buffer_surface_state(struct anv_device *device,
3292 struct anv_state state,
3293 enum isl_format format,
3294 struct anv_address address,
3295 uint32_t range, uint32_t stride);
3296
3297 static inline void
3298 anv_clear_color_from_att_state(union isl_color_value *clear_color,
3299 const struct anv_attachment_state *att_state,
3300 const struct anv_image_view *iview)
3301 {
3302 const struct isl_format_layout *view_fmtl =
3303 isl_format_get_layout(iview->planes[0].isl.format);
3304
3305 #define COPY_CLEAR_COLOR_CHANNEL(c, i) \
3306 if (view_fmtl->channels.c.bits) \
3307 clear_color->u32[i] = att_state->clear_value.color.uint32[i]
3308
3309 COPY_CLEAR_COLOR_CHANNEL(r, 0);
3310 COPY_CLEAR_COLOR_CHANNEL(g, 1);
3311 COPY_CLEAR_COLOR_CHANNEL(b, 2);
3312 COPY_CLEAR_COLOR_CHANNEL(a, 3);
3313
3314 #undef COPY_CLEAR_COLOR_CHANNEL
3315 }
3316
3317
3318 struct anv_ycbcr_conversion {
3319 const struct anv_format * format;
3320 VkSamplerYcbcrModelConversion ycbcr_model;
3321 VkSamplerYcbcrRange ycbcr_range;
3322 VkComponentSwizzle mapping[4];
3323 VkChromaLocation chroma_offsets[2];
3324 VkFilter chroma_filter;
3325 bool chroma_reconstruction;
3326 };
3327
3328 struct anv_sampler {
3329 uint32_t state[3][4];
3330 uint32_t n_planes;
3331 struct anv_ycbcr_conversion *conversion;
3332 };
3333
3334 struct anv_framebuffer {
3335 uint32_t width;
3336 uint32_t height;
3337 uint32_t layers;
3338
3339 uint32_t attachment_count;
3340 struct anv_image_view * attachments[0];
3341 };
3342
3343 struct anv_subpass_attachment {
3344 VkImageUsageFlagBits usage;
3345 uint32_t attachment;
3346 VkImageLayout layout;
3347 };
3348
3349 struct anv_subpass {
3350 uint32_t attachment_count;
3351
3352 /**
3353 * A pointer to all attachment references used in this subpass.
3354 * Only valid if ::attachment_count > 0.
3355 */
3356 struct anv_subpass_attachment * attachments;
3357 uint32_t input_count;
3358 struct anv_subpass_attachment * input_attachments;
3359 uint32_t color_count;
3360 struct anv_subpass_attachment * color_attachments;
3361 struct anv_subpass_attachment * resolve_attachments;
3362
3363 struct anv_subpass_attachment * depth_stencil_attachment;
3364 struct anv_subpass_attachment * ds_resolve_attachment;
3365 VkResolveModeFlagBitsKHR depth_resolve_mode;
3366 VkResolveModeFlagBitsKHR stencil_resolve_mode;
3367
3368 uint32_t view_mask;
3369
3370 /** Subpass has a depth/stencil self-dependency */
3371 bool has_ds_self_dep;
3372
3373 /** Subpass has at least one color resolve attachment */
3374 bool has_color_resolve;
3375 };
3376
3377 static inline unsigned
3378 anv_subpass_view_count(const struct anv_subpass *subpass)
3379 {
3380 return MAX2(1, util_bitcount(subpass->view_mask));
3381 }
3382
3383 struct anv_render_pass_attachment {
3384 /* TODO: Consider using VkAttachmentDescription instead of storing each of
3385 * its members individually.
3386 */
3387 VkFormat format;
3388 uint32_t samples;
3389 VkImageUsageFlags usage;
3390 VkAttachmentLoadOp load_op;
3391 VkAttachmentStoreOp store_op;
3392 VkAttachmentLoadOp stencil_load_op;
3393 VkImageLayout initial_layout;
3394 VkImageLayout final_layout;
3395 VkImageLayout first_subpass_layout;
3396
3397 /* The subpass id in which the attachment will be used last. */
3398 uint32_t last_subpass_idx;
3399 };
3400
3401 struct anv_render_pass {
3402 uint32_t attachment_count;
3403 uint32_t subpass_count;
3404 /* An array of subpass_count+1 flushes, one per subpass boundary */
3405 enum anv_pipe_bits * subpass_flushes;
3406 struct anv_render_pass_attachment * attachments;
3407 struct anv_subpass subpasses[0];
3408 };
3409
3410 #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
3411
3412 struct anv_query_pool {
3413 VkQueryType type;
3414 VkQueryPipelineStatisticFlags pipeline_statistics;
3415 /** Stride between slots, in bytes */
3416 uint32_t stride;
3417 /** Number of slots in this query pool */
3418 uint32_t slots;
3419 struct anv_bo bo;
3420 };
3421
3422 int anv_get_instance_entrypoint_index(const char *name);
3423 int anv_get_device_entrypoint_index(const char *name);
3424
3425 bool
3426 anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
3427 const struct anv_instance_extension_table *instance);
3428
3429 bool
3430 anv_device_entrypoint_is_enabled(int index, uint32_t core_version,
3431 const struct anv_instance_extension_table *instance,
3432 const struct anv_device_extension_table *device);
3433
3434 void *anv_lookup_entrypoint(const struct gen_device_info *devinfo,
3435 const char *name);
3436
3437 void anv_dump_image_to_ppm(struct anv_device *device,
3438 struct anv_image *image, unsigned miplevel,
3439 unsigned array_layer, VkImageAspectFlagBits aspect,
3440 const char *filename);
3441
3442 enum anv_dump_action {
3443 ANV_DUMP_FRAMEBUFFERS_BIT = 0x1,
3444 };
3445
3446 void anv_dump_start(struct anv_device *device, enum anv_dump_action actions);
3447 void anv_dump_finish(void);
3448
3449 void anv_dump_add_framebuffer(struct anv_cmd_buffer *cmd_buffer,
3450 struct anv_framebuffer *fb);
3451
3452 static inline uint32_t
3453 anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)
3454 {
3455 /* This function must be called from within a subpass. */
3456 assert(cmd_state->pass && cmd_state->subpass);
3457
3458 const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;
3459
3460 /* The id of this subpass shouldn't exceed the number of subpasses in this
3461 * render pass minus 1.
3462 */
3463 assert(subpass_id < cmd_state->pass->subpass_count);
3464 return subpass_id;
3465 }
3466
3467 #define ANV_DEFINE_HANDLE_CASTS(__anv_type, __VkType) \
3468 \
3469 static inline struct __anv_type * \
3470 __anv_type ## _from_handle(__VkType _handle) \
3471 { \
3472 return (struct __anv_type *) _handle; \
3473 } \
3474 \
3475 static inline __VkType \
3476 __anv_type ## _to_handle(struct __anv_type *_obj) \
3477 { \
3478 return (__VkType) _obj; \
3479 }
3480
3481 #define ANV_DEFINE_NONDISP_HANDLE_CASTS(__anv_type, __VkType) \
3482 \
3483 static inline struct __anv_type * \
3484 __anv_type ## _from_handle(__VkType _handle) \
3485 { \
3486 return (struct __anv_type *)(uintptr_t) _handle; \
3487 } \
3488 \
3489 static inline __VkType \
3490 __anv_type ## _to_handle(struct __anv_type *_obj) \
3491 { \
3492 return (__VkType)(uintptr_t) _obj; \
3493 }
3494
3495 #define ANV_FROM_HANDLE(__anv_type, __name, __handle) \
3496 struct __anv_type *__name = __anv_type ## _from_handle(__handle)
3497
3498 ANV_DEFINE_HANDLE_CASTS(anv_cmd_buffer, VkCommandBuffer)
3499 ANV_DEFINE_HANDLE_CASTS(anv_device, VkDevice)
3500 ANV_DEFINE_HANDLE_CASTS(anv_instance, VkInstance)
3501 ANV_DEFINE_HANDLE_CASTS(anv_physical_device, VkPhysicalDevice)
3502 ANV_DEFINE_HANDLE_CASTS(anv_queue, VkQueue)
3503
3504 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, VkCommandPool)
3505 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, VkBuffer)
3506 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, VkBufferView)
3507 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, VkDescriptorPool)
3508 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, VkDescriptorSet)
3509 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, VkDescriptorSetLayout)
3510 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, VkDescriptorUpdateTemplate)
3511 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, VkDeviceMemory)
3512 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, VkFence)
3513 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_event, VkEvent)
3514 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, VkFramebuffer)
3515 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image, VkImage)
3516 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, VkImageView);
3517 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, VkPipelineCache)
3518 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, VkPipeline)
3519 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, VkPipelineLayout)
3520 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, VkQueryPool)
3521 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, VkRenderPass)
3522 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, VkSampler)
3523 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, VkSemaphore)
3524 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_shader_module, VkShaderModule)
3525 ANV_DEFINE_NONDISP_HANDLE_CASTS(vk_debug_report_callback, VkDebugReportCallbackEXT)
3526 ANV_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, VkSamplerYcbcrConversion)
3527
3528 /* Gen-specific function declarations */
3529 #ifdef genX
3530 # include "anv_genX.h"
3531 #else
3532 # define genX(x) gen7_##x
3533 # include "anv_genX.h"
3534 # undef genX
3535 # define genX(x) gen75_##x
3536 # include "anv_genX.h"
3537 # undef genX
3538 # define genX(x) gen8_##x
3539 # include "anv_genX.h"
3540 # undef genX
3541 # define genX(x) gen9_##x
3542 # include "anv_genX.h"
3543 # undef genX
3544 # define genX(x) gen10_##x
3545 # include "anv_genX.h"
3546 # undef genX
3547 # define genX(x) gen11_##x
3548 # include "anv_genX.h"
3549 # undef genX
3550 #endif
3551
3552 #endif /* ANV_PRIVATE_H */