2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
37 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
42 #include "brw_device_info.h"
43 #include "util/macros.h"
46 #include <vulkan/vulkan.h>
47 #include <vulkan/vulkan_intel.h>
48 #include <vulkan/vk_wsi_lunarg.h>
50 #include "entrypoints.h"
52 #include "brw_context.h"
58 #define anv_noreturn __attribute__((__noreturn__))
59 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
61 #define MAX(a, b) ((a) > (b) ? (a) : (b))
63 static inline uint32_t
64 ALIGN_U32(uint32_t v
, uint32_t a
)
66 return (v
+ a
- 1) & ~(a
- 1);
70 ALIGN_I32(int32_t v
, int32_t a
)
72 return (v
+ a
- 1) & ~(a
- 1);
75 /** Alignment must be a power of 2. */
77 anv_is_aligned(uintmax_t n
, uintmax_t a
)
79 assert(a
== (a
& -a
));
80 return (n
& (a
- 1)) == 0;
83 static inline uint32_t
84 anv_minify(uint32_t n
, uint32_t levels
)
89 return MAX(n
>> levels
, 1);
92 #define for_each_bit(b, dword) \
93 for (uint32_t __dword = (dword); \
94 (b) = __builtin_ffs(__dword) - 1, __dword; \
95 __dword &= ~(1 << (b)))
97 /* Define no kernel as 1, since that's an illegal offset for a kernel */
101 VkStructureType sType
;
105 /* Whenever we generate an error, pass it through this function. Useful for
106 * debugging, where we can break on it. Only call at error site, not when
107 * propagating errors. Might be useful to plug in a stack trace here.
110 static inline VkResult
111 vk_error(VkResult error
)
114 fprintf(stderr
, "vk_error: %x\n", error
);
120 void __anv_finishme(const char *file
, int line
, const char *format
, ...)
121 anv_printflike(3, 4);
122 void anv_loge(const char *format
, ...) anv_printflike(1, 2);
123 void anv_loge_v(const char *format
, va_list va
);
126 * Print a FINISHME message, including its source location.
128 #define anv_finishme(format, ...) \
129 __anv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__);
131 /* A non-fatal assert. Useful for debugging. */
133 #define anv_assert(x) ({ \
134 if (unlikely(!(x))) \
135 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
138 #define anv_assert(x)
141 void anv_abortf(const char *format
, ...) anv_noreturn
anv_printflike(1, 2);
142 void anv_abortfv(const char *format
, va_list va
) anv_noreturn
;
144 #define stub_return(v) \
146 anv_finishme("stub %s", __func__); \
152 anv_finishme("stub %s", __func__); \
157 * A dynamically growable, circular buffer. Elements are added at head and
158 * removed from tail. head and tail are free-running uint32_t indices and we
159 * only compute the modulo with size when accessing the array. This way,
160 * number of bytes in the queue is always head - tail, even in case of
167 uint32_t element_size
;
172 int anv_vector_init(struct anv_vector
*queue
, uint32_t element_size
, uint32_t size
);
173 void *anv_vector_add(struct anv_vector
*queue
);
174 void *anv_vector_remove(struct anv_vector
*queue
);
177 anv_vector_length(struct anv_vector
*queue
)
179 return (queue
->head
- queue
->tail
) / queue
->element_size
;
183 anv_vector_finish(struct anv_vector
*queue
)
188 #define anv_vector_foreach(elem, queue) \
189 static_assert(__builtin_types_compatible_p(__typeof__(queue), struct anv_vector *), ""); \
190 for (uint32_t __anv_vector_offset = (queue)->tail; \
191 elem = (queue)->data + (__anv_vector_offset & ((queue)->size - 1)), __anv_vector_offset < (queue)->head; \
192 __anv_vector_offset += (queue)->element_size)
200 /* This field is here for the benefit of the aub dumper. It can (and for
201 * userptr bos it must) be set to the cpu map of the buffer. Destroying
202 * the bo won't clean up the mmap, it's still the responsibility of the bo
203 * user to do that. */
207 /* Represents a lock-free linked list of "free" things. This is used by
208 * both the block pool and the state pools. Unfortunately, in order to
209 * solve the ABA problem, we can't use a single uint32_t head.
211 union anv_free_list
{
215 /* A simple count that is incremented every time the head changes. */
221 #define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { 1, 0 } })
223 struct anv_block_pool
{
224 struct anv_device
*device
;
232 * Array of mmaps and gem handles owned by the block pool, reclaimed when
233 * the block pool is destroyed.
235 struct anv_vector mmap_cleanups
;
240 union anv_free_list free_list
;
243 struct anv_block_state
{
259 struct anv_fixed_size_state_pool
{
261 union anv_free_list free_list
;
262 struct anv_block_state block
;
265 #define ANV_MIN_STATE_SIZE_LOG2 6
266 #define ANV_MAX_STATE_SIZE_LOG2 10
268 #define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2)
270 struct anv_state_pool
{
271 struct anv_block_pool
*block_pool
;
272 struct anv_fixed_size_state_pool buckets
[ANV_STATE_BUCKETS
];
275 struct anv_state_stream
{
276 struct anv_block_pool
*block_pool
;
278 uint32_t current_block
;
282 void anv_block_pool_init(struct anv_block_pool
*pool
,
283 struct anv_device
*device
, uint32_t block_size
);
284 void anv_block_pool_finish(struct anv_block_pool
*pool
);
285 uint32_t anv_block_pool_alloc(struct anv_block_pool
*pool
);
286 void anv_block_pool_free(struct anv_block_pool
*pool
, uint32_t offset
);
287 void anv_state_pool_init(struct anv_state_pool
*pool
,
288 struct anv_block_pool
*block_pool
);
289 struct anv_state
anv_state_pool_alloc(struct anv_state_pool
*pool
,
290 size_t state_size
, size_t alignment
);
291 void anv_state_pool_free(struct anv_state_pool
*pool
, struct anv_state state
);
292 void anv_state_stream_init(struct anv_state_stream
*stream
,
293 struct anv_block_pool
*block_pool
);
294 void anv_state_stream_finish(struct anv_state_stream
*stream
);
295 struct anv_state
anv_state_stream_alloc(struct anv_state_stream
*stream
,
296 uint32_t size
, uint32_t alignment
);
299 * Implements a pool of re-usable BOs. The interface is identical to that
300 * of block_pool except that each block is its own BO.
303 struct anv_device
*device
;
310 void anv_bo_pool_init(struct anv_bo_pool
*pool
,
311 struct anv_device
*device
, uint32_t block_size
);
312 void anv_bo_pool_finish(struct anv_bo_pool
*pool
);
313 VkResult
anv_bo_pool_alloc(struct anv_bo_pool
*pool
, struct anv_bo
*bo
);
314 void anv_bo_pool_free(struct anv_bo_pool
*pool
, const struct anv_bo
*bo
);
319 typedef void (*anv_object_destructor_cb
)(struct anv_device
*,
324 anv_object_destructor_cb destructor
;
327 struct anv_physical_device
{
328 struct anv_instance
* instance
;
333 const struct brw_device_info
* info
;
336 struct anv_instance
{
337 void * pAllocUserData
;
338 PFN_vkAllocFunction pfnAlloc
;
339 PFN_vkFreeFunction pfnFree
;
341 uint32_t physicalDeviceCount
;
342 struct anv_physical_device physicalDevice
;
345 struct anv_meta_state
{
352 VkPipelineLayout pipeline_layout
;
353 VkDescriptorSetLayout ds_layout
;
357 VkDynamicRsState rs_state
;
358 VkDynamicCbState cb_state
;
359 VkDynamicDsState ds_state
;
364 struct anv_device
* device
;
366 struct anv_state_pool
* pool
;
369 * Serial number of the most recently completed batch executed on the
372 struct anv_state completed_serial
;
375 * The next batch submitted to the engine will be assigned this serial
378 uint32_t next_serial
;
380 uint32_t last_collected_serial
;
384 struct anv_instance
* instance
;
386 struct brw_device_info info
;
392 struct anv_bo_pool batch_bo_pool
;
394 struct anv_block_pool dynamic_state_block_pool
;
395 struct anv_state_pool dynamic_state_pool
;
397 struct anv_block_pool instruction_block_pool
;
398 struct anv_block_pool surface_state_block_pool
;
399 struct anv_state_pool surface_state_pool
;
401 struct anv_meta_state meta_state
;
403 struct anv_state float_border_colors
;
404 struct anv_state uint32_border_colors
;
406 struct anv_queue queue
;
408 struct anv_block_pool scratch_block_pool
;
410 struct anv_compiler
* compiler
;
411 struct anv_aub_writer
* aub_writer
;
412 pthread_mutex_t mutex
;
416 anv_device_alloc(struct anv_device
* device
,
419 VkSystemAllocType allocType
);
422 anv_device_free(struct anv_device
* device
,
425 void* anv_gem_mmap(struct anv_device
*device
,
426 uint32_t gem_handle
, uint64_t offset
, uint64_t size
);
427 void anv_gem_munmap(void *p
, uint64_t size
);
428 uint32_t anv_gem_create(struct anv_device
*device
, size_t size
);
429 void anv_gem_close(struct anv_device
*device
, int gem_handle
);
430 int anv_gem_userptr(struct anv_device
*device
, void *mem
, size_t size
);
431 int anv_gem_wait(struct anv_device
*device
, int gem_handle
, int64_t *timeout_ns
);
432 int anv_gem_execbuffer(struct anv_device
*device
,
433 struct drm_i915_gem_execbuffer2
*execbuf
);
434 int anv_gem_set_tiling(struct anv_device
*device
, int gem_handle
,
435 uint32_t stride
, uint32_t tiling
);
436 int anv_gem_create_context(struct anv_device
*device
);
437 int anv_gem_destroy_context(struct anv_device
*device
, int context
);
438 int anv_gem_get_param(int fd
, uint32_t param
);
439 int anv_gem_get_aperture(struct anv_device
*device
, uint64_t *size
);
440 int anv_gem_handle_to_fd(struct anv_device
*device
, int gem_handle
);
441 int anv_gem_fd_to_handle(struct anv_device
*device
, int fd
);
442 int anv_gem_userptr(struct anv_device
*device
, void *mem
, size_t size
);
444 VkResult
anv_bo_init_new(struct anv_bo
*bo
, struct anv_device
*device
, uint64_t size
);
446 struct anv_reloc_list
{
449 struct drm_i915_gem_relocation_entry
* relocs
;
450 struct anv_bo
** reloc_bos
;
453 VkResult
anv_reloc_list_init(struct anv_reloc_list
*list
,
454 struct anv_device
*device
);
455 void anv_reloc_list_finish(struct anv_reloc_list
*list
,
456 struct anv_device
*device
);
458 struct anv_batch_bo
{
461 /* Bytes actually consumed in this batch BO */
464 /* These offsets reference the per-batch reloc list */
468 struct anv_batch_bo
* prev_batch_bo
;
472 struct anv_device
* device
;
478 struct anv_reloc_list relocs
;
480 /* This callback is called (with the associated user data) in the event
481 * that the batch runs out of space.
483 VkResult (*extend_cb
)(struct anv_batch
*, void *);
487 void *anv_batch_emit_dwords(struct anv_batch
*batch
, int num_dwords
);
488 void anv_batch_emit_batch(struct anv_batch
*batch
, struct anv_batch
*other
);
489 uint64_t anv_batch_emit_reloc(struct anv_batch
*batch
,
490 void *location
, struct anv_bo
*bo
, uint32_t offset
);
497 #define __gen_address_type struct anv_address
498 #define __gen_user_data struct anv_batch
500 static inline uint64_t
501 __gen_combine_address(struct anv_batch
*batch
, void *location
,
502 const struct anv_address address
, uint32_t delta
)
504 if (address
.bo
== NULL
) {
507 assert(batch
->start
<= location
&& location
< batch
->end
);
509 return anv_batch_emit_reloc(batch
, location
, address
.bo
, address
.offset
+ delta
);
513 #include "gen7_pack.h"
514 #include "gen75_pack.h"
515 #undef GEN8_3DSTATE_MULTISAMPLE
516 #include "gen8_pack.h"
518 #define anv_batch_emit(batch, cmd, ...) do { \
519 struct cmd __template = { \
523 void *__dst = anv_batch_emit_dwords(batch, cmd ## _length); \
524 cmd ## _pack(batch, __dst, &__template); \
527 #define anv_batch_emitn(batch, n, cmd, ...) ({ \
528 struct cmd __template = { \
530 .DwordLength = n - cmd ## _length_bias, \
533 void *__dst = anv_batch_emit_dwords(batch, n); \
534 cmd ## _pack(batch, __dst, &__template); \
538 #define anv_batch_emit_merge(batch, dwords0, dwords1) \
542 assert(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
543 dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \
544 for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
545 dw[i] = (dwords0)[i] | (dwords1)[i]; \
546 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\
549 #define GEN8_MOCS { \
550 .MemoryTypeLLCeLLCCacheabilityControl = WB, \
551 .TargetCache = L3DefertoPATforLLCeLLCselection, \
555 struct anv_device_memory
{
557 VkDeviceSize map_size
;
561 struct anv_dynamic_vp_state
{
562 struct anv_object base
;
563 struct anv_state sf_clip_vp
;
564 struct anv_state cc_vp
;
565 struct anv_state scissor
;
568 struct anv_dynamic_rs_state
{
569 uint32_t state_sf
[GEN8_3DSTATE_SF_length
];
570 uint32_t state_raster
[GEN8_3DSTATE_RASTER_length
];
573 struct anv_dynamic_ds_state
{
574 uint32_t state_wm_depth_stencil
[GEN8_3DSTATE_WM_DEPTH_STENCIL_length
];
575 uint32_t state_color_calc
[GEN8_COLOR_CALC_STATE_length
];
578 struct anv_dynamic_cb_state
{
579 uint32_t state_color_calc
[GEN8_COLOR_CALC_STATE_length
];
583 struct anv_descriptor_slot
{
588 struct anv_descriptor_set_layout
{
590 uint32_t surface_count
;
591 struct anv_descriptor_slot
*surface_start
;
592 uint32_t sampler_count
;
593 struct anv_descriptor_slot
*sampler_start
;
594 } stage
[VK_NUM_SHADER_STAGE
];
597 uint32_t num_dynamic_buffers
;
598 uint32_t shader_stages
;
599 struct anv_descriptor_slot entries
[0];
602 struct anv_descriptor
{
603 struct anv_sampler
*sampler
;
604 struct anv_surface_view
*view
;
607 struct anv_descriptor_set
{
608 struct anv_descriptor descriptors
[0];
615 struct anv_pipeline_layout
{
617 struct anv_descriptor_set_layout
*layout
;
618 uint32_t surface_start
[VK_NUM_SHADER_STAGE
];
619 uint32_t sampler_start
[VK_NUM_SHADER_STAGE
];
625 uint32_t surface_count
;
626 uint32_t sampler_count
;
627 } stage
[VK_NUM_SHADER_STAGE
];
631 struct anv_device
* device
;
639 #define ANV_CMD_BUFFER_PIPELINE_DIRTY (1 << 0)
640 #define ANV_CMD_BUFFER_RS_DIRTY (1 << 2)
641 #define ANV_CMD_BUFFER_DS_DIRTY (1 << 3)
642 #define ANV_CMD_BUFFER_CB_DIRTY (1 << 4)
643 #define ANV_CMD_BUFFER_VP_DIRTY (1 << 5)
645 struct anv_vertex_binding
{
646 struct anv_buffer
* buffer
;
650 struct anv_descriptor_set_binding
{
651 struct anv_descriptor_set
* set
;
652 uint32_t dynamic_offsets
[128];
655 struct anv_cmd_buffer
{
656 struct anv_object base
;
657 struct anv_device
* device
;
659 struct drm_i915_gem_execbuffer2 execbuf
;
660 struct drm_i915_gem_exec_object2
* exec2_objects
;
661 struct anv_bo
** exec2_bos
;
662 uint32_t exec2_array_length
;
667 struct anv_batch batch
;
668 struct anv_batch_bo
* last_batch_bo
;
669 struct anv_batch_bo
* surface_batch_bo
;
670 uint32_t surface_next
;
671 struct anv_reloc_list surface_relocs
;
672 struct anv_state_stream surface_state_stream
;
673 struct anv_state_stream dynamic_state_stream
;
675 /* State required while building cmd buffer */
676 uint32_t current_pipeline
;
679 uint32_t compute_dirty
;
680 uint32_t descriptors_dirty
;
681 uint32_t scratch_size
;
682 struct anv_pipeline
* pipeline
;
683 struct anv_pipeline
* compute_pipeline
;
684 struct anv_framebuffer
* framebuffer
;
685 struct anv_dynamic_rs_state
* rs_state
;
686 struct anv_dynamic_ds_state
* ds_state
;
687 struct anv_dynamic_vp_state
* vp_state
;
688 struct anv_dynamic_cb_state
* cb_state
;
689 struct anv_vertex_binding vertex_bindings
[MAX_VBS
];
690 struct anv_descriptor_set_binding descriptors
[MAX_SETS
];
693 void anv_cmd_buffer_dump(struct anv_cmd_buffer
*cmd_buffer
);
694 void anv_aub_writer_destroy(struct anv_aub_writer
*writer
);
697 struct anv_object base
;
699 struct drm_i915_gem_execbuffer2 execbuf
;
700 struct drm_i915_gem_exec_object2 exec2_objects
[1];
709 struct anv_pipeline
{
710 struct anv_object base
;
711 struct anv_device
* device
;
712 struct anv_batch batch
;
713 uint32_t batch_data
[256];
714 struct anv_shader
* shaders
[VK_NUM_SHADER_STAGE
];
715 struct anv_pipeline_layout
* layout
;
718 struct brw_vs_prog_data vs_prog_data
;
719 struct brw_wm_prog_data wm_prog_data
;
720 struct brw_gs_prog_data gs_prog_data
;
721 struct brw_cs_prog_data cs_prog_data
;
722 struct brw_stage_prog_data
* prog_data
[VK_NUM_SHADER_STAGE
];
723 uint32_t scratch_start
[VK_NUM_SHADER_STAGE
];
724 uint32_t total_scratch
;
728 uint32_t nr_vs_entries
;
731 uint32_t nr_gs_entries
;
734 uint32_t active_stages
;
735 struct anv_state_stream program_stream
;
736 struct anv_state blend_state
;
741 uint32_t gs_vertex_count
;
745 uint32_t binding_stride
[MAX_VBS
];
747 uint32_t state_sf
[GEN8_3DSTATE_SF_length
];
748 uint32_t state_raster
[GEN8_3DSTATE_RASTER_length
];
749 uint32_t state_wm_depth_stencil
[GEN8_3DSTATE_WM_DEPTH_STENCIL_length
];
751 uint32_t cs_thread_width_max
;
752 uint32_t cs_right_mask
;
755 struct anv_pipeline_create_info
{
757 bool disable_viewport
;
758 bool disable_scissor
;
764 anv_pipeline_create(VkDevice device
,
765 const VkGraphicsPipelineCreateInfo
*pCreateInfo
,
766 const struct anv_pipeline_create_info
*extra
,
767 VkPipeline
*pPipeline
);
769 struct anv_compiler
*anv_compiler_create(struct anv_device
*device
);
770 void anv_compiler_destroy(struct anv_compiler
*compiler
);
771 int anv_compiler_run(struct anv_compiler
*compiler
, struct anv_pipeline
*pipeline
);
772 void anv_compiler_free(struct anv_pipeline
*pipeline
);
776 uint16_t surface_format
; /**< RENDER_SURFACE_STATE.SurfaceFormat */
778 uint8_t num_channels
;
782 const struct anv_format
*
783 anv_format_for_vk_format(VkFormat format
);
794 uint32_t stencil_offset
;
795 uint32_t stencil_stride
;
801 struct anv_swap_chain
* swap_chain
;
804 * \name Alignment of miptree images, in units of pixels.
806 * These fields contain the actual alignment values, not the values the
807 * hardware expects. For example, if h_align is 4, then program the hardware
810 * \see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment
811 * \see RENDER_SURFACE_STATE.SurfaceVerticalAlignment
818 /** RENDER_SURFACE_STATE.SurfaceType */
822 struct anv_surface_view
{
823 struct anv_object base
;
825 struct anv_state surface_state
;
833 struct anv_image_create_info
{
837 VkResult
anv_image_create(VkDevice _device
,
838 const VkImageCreateInfo
*pCreateInfo
,
839 const struct anv_image_create_info
*extra
,
842 void anv_image_view_init(struct anv_surface_view
*view
,
843 struct anv_device
*device
,
844 const VkImageViewCreateInfo
* pCreateInfo
,
845 struct anv_cmd_buffer
*cmd_buffer
);
847 void anv_color_attachment_view_init(struct anv_surface_view
*view
,
848 struct anv_device
*device
,
849 const VkColorAttachmentViewCreateInfo
* pCreateInfo
,
850 struct anv_cmd_buffer
*cmd_buffer
);
852 void anv_surface_view_destroy(struct anv_device
*device
,
853 struct anv_object
*obj
, VkObjectType obj_type
);
859 struct anv_depth_stencil_view
{
862 uint32_t depth_offset
;
863 uint32_t depth_stride
;
864 uint32_t depth_format
;
866 uint32_t stencil_offset
;
867 uint32_t stencil_stride
;
870 struct anv_framebuffer
{
871 struct anv_object base
;
872 uint32_t color_attachment_count
;
873 const struct anv_surface_view
* color_attachments
[MAX_RTS
];
874 const struct anv_depth_stencil_view
* depth_stencil
;
876 uint32_t sample_count
;
881 /* Viewport for clears */
882 VkDynamicVpState vp_state
;
885 struct anv_render_pass_layer
{
886 VkAttachmentLoadOp color_load_op
;
887 VkClearColor clear_color
;
890 struct anv_render_pass
{
893 uint32_t num_clear_layers
;
895 struct anv_render_pass_layer layers
[0];
898 void anv_device_init_meta(struct anv_device
*device
);
899 void anv_device_finish_meta(struct anv_device
*device
);
902 anv_cmd_buffer_clear(struct anv_cmd_buffer
*cmd_buffer
,
903 struct anv_render_pass
*pass
);
906 anv_lookup_entrypoint(const char *name
);