X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_private.h;h=3d1ed0fe5f45b002e6d9c989cdb046bec620867d;hp=6006df36cdd0d68d798711033920f26fc3b0d37d;hb=add2b44ab69719e3f01184980a05559883bc44bf;hpb=d37deebde57138c00f244f2bba2f82b4a0a7d958 diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 6006df36cdd..3d1ed0fe5f4 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -47,11 +47,12 @@ #include "main/macros.h" #include "util/list.h" #include "util/macros.h" +#include "util/u_atomic.h" #include "vk_alloc.h" +#include "vk_object.h" #include "vk_debug_report.h" #include "wsi_common.h" -#include "drm-uapi/msm_drm.h" #include "ir3/ir3_compiler.h" #include "ir3/ir3_shader.h" @@ -62,6 +63,7 @@ #include "tu_descriptor_set.h" #include "tu_extensions.h" +#include "tu_util.h" /* Pre-declarations needed for WSI entrypoints */ struct wl_surface; @@ -129,6 +131,7 @@ tu_minify(uint32_t n, uint32_t levels) }) #define COND(bool, val) ((bool) ? (val) : 0) +#define BIT(bit) (1u << (bit)) /* Whenever we generate an error, pass it through this function. Useful for * debugging, where we can break on it. Only call at error site, not when @@ -170,13 +173,6 @@ tu_logi(const char *format, ...) tu_printflike(1, 2); } \ } while (0) -/* Suppress -Wunused in stub functions */ -#define tu_use_args(...) __tu_use_args(0, ##__VA_ARGS__) -static inline void -__tu_use_args(int ignore, ...) -{ -} - #define tu_stub() \ do { \ tu_finishme("stub %s", __func__); \ @@ -193,7 +189,7 @@ tu_lookup_entrypoint_checked( struct tu_physical_device { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; struct tu_instance *instance; @@ -225,6 +221,11 @@ struct tu_physical_device uint32_t SP_UNKNOWN_A0F8; } magic; + int msm_major_version; + int msm_minor_version; + + bool limited_z24s8; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -246,7 +247,7 @@ enum tu_debug_flags struct tu_instance { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; VkAllocationCallbacks alloc; @@ -278,6 +279,8 @@ struct cache_entry; struct tu_pipeline_cache { + struct vk_object_base base; + struct tu_device *device; pthread_mutex_t mutex; @@ -302,6 +305,7 @@ struct tu_pipeline_key struct tu_fence { + struct vk_object_base base; struct wsi_fence *fence_wsi; bool signaled; int fd; @@ -322,7 +326,8 @@ tu_fence_wait_idle(struct tu_fence *fence); struct tu_queue { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; + struct tu_device *device; uint32_t queue_family_index; int queue_idx; @@ -340,29 +345,61 @@ struct tu_bo void *map; }; -struct tu_device +enum global_shader { + GLOBAL_SH_VS, + GLOBAL_SH_FS_BLIT, + GLOBAL_SH_FS_CLEAR0, + GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS, + GLOBAL_SH_COUNT, +}; + +/* This struct defines the layout of the global_bo */ +struct tu6_global { - VK_LOADER_DATA _loader_data; + /* 6 bcolor_entry entries, one for each VK_BORDER_COLOR */ + uint8_t border_color[128 * 6]; - VkAllocationCallbacks alloc; + /* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */ + instr_t shaders[GLOBAL_SH_COUNT][16]; + + uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */ + uint32_t _pad0; + volatile uint32_t vsc_draw_overflow; + uint32_t _pad1; + volatile uint32_t vsc_prim_overflow; + uint32_t _pad2; + uint64_t predicate; + /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ + struct { + uint32_t offset; + uint32_t pad[7]; + } flush_base[4]; +}; +#define gb_offset(member) offsetof(struct tu6_global, member) +#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member)) + +void tu_init_clear_blit_shaders(struct tu6_global *global); + +/* extra space in vsc draw/prim streams */ +#define VSC_PAD 0x40 + +struct tu_device +{ + struct vk_device vk; struct tu_instance *instance; struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES]; int queue_count[TU_MAX_QUEUE_FAMILIES]; struct tu_physical_device *physical_device; + int _lost; struct ir3_compiler *compiler; /* Backup in-memory cache to be used if the app doesn't provide one */ struct tu_pipeline_cache *mem_cache; - struct tu_bo vsc_draw_strm; - struct tu_bo vsc_prim_strm; - uint32_t vsc_draw_strm_pitch; - uint32_t vsc_prim_strm_pitch; - #define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */ /* Currently the kernel driver uses a 32-bit GPU address space, but it @@ -374,11 +411,27 @@ struct tu_device bool initialized; } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2]; - struct tu_bo border_color; + struct tu_bo global_bo; struct tu_device_extension_table enabled_extensions; + + uint32_t vsc_draw_strm_pitch; + uint32_t vsc_prim_strm_pitch; + mtx_t vsc_pitch_mtx; }; +VkResult _tu_device_set_lost(struct tu_device *device, + const char *file, int line, + const char *msg, ...) PRINTFLIKE(4, 5); +#define tu_device_set_lost(dev, ...) \ + _tu_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__) + +static inline bool +tu_device_is_lost(struct tu_device *device) +{ + return unlikely(p_atomic_read(&device->_lost)); +} + VkResult tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size); VkResult @@ -411,11 +464,50 @@ struct tu_cs_entry uint32_t offset; }; -struct ts_cs_memory { +struct tu_cs_memory { uint32_t *map; uint64_t iova; }; +struct tu_draw_state { + uint64_t iova : 48; + uint32_t size : 16; +}; + +enum tu_dynamic_state +{ + /* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */ + TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1, + TU_DYNAMIC_STATE_COUNT, +}; + +enum tu_draw_state_group_id +{ + TU_DRAW_STATE_PROGRAM, + TU_DRAW_STATE_PROGRAM_BINNING, + TU_DRAW_STATE_TESS, + TU_DRAW_STATE_VB, + TU_DRAW_STATE_VI, + TU_DRAW_STATE_VI_BINNING, + TU_DRAW_STATE_RAST, + TU_DRAW_STATE_DS, + TU_DRAW_STATE_BLEND, + TU_DRAW_STATE_VS_CONST, + TU_DRAW_STATE_HS_CONST, + TU_DRAW_STATE_DS_CONST, + TU_DRAW_STATE_GS_CONST, + TU_DRAW_STATE_FS_CONST, + TU_DRAW_STATE_DESC_SETS, + TU_DRAW_STATE_DESC_SETS_LOAD, + TU_DRAW_STATE_VS_PARAMS, + TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM, + TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM, + + /* dynamic state related draw states */ + TU_DRAW_STATE_DYNAMIC, + TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT, +}; + enum tu_cs_mode { @@ -475,6 +567,8 @@ struct tu_cs struct tu_device_memory { + struct vk_object_base base; + struct tu_bo bo; VkDeviceSize size; @@ -495,6 +589,8 @@ struct tu_descriptor_range struct tu_descriptor_set { + struct vk_object_base base; + const struct tu_descriptor_set_layout *layout; struct tu_descriptor_pool *pool; uint32_t size; @@ -522,6 +618,8 @@ struct tu_descriptor_pool_entry struct tu_descriptor_pool { + struct vk_object_base base; + struct tu_bo bo; uint64_t current_offset; uint64_t size; @@ -564,12 +662,16 @@ struct tu_descriptor_update_template_entry struct tu_descriptor_update_template { + struct vk_object_base base; + uint32_t entry_count; struct tu_descriptor_update_template_entry entry[0]; }; struct tu_buffer { + struct vk_object_base base; + VkDeviceSize size; VkBufferUsageFlags usage; @@ -585,98 +687,12 @@ tu_buffer_iova(struct tu_buffer *buffer) return buffer->bo->iova + buffer->bo_offset; } -enum tu_dynamic_state_bits -{ - TU_DYNAMIC_VIEWPORT = 1 << 0, - TU_DYNAMIC_SCISSOR = 1 << 1, - TU_DYNAMIC_LINE_WIDTH = 1 << 2, - TU_DYNAMIC_DEPTH_BIAS = 1 << 3, - TU_DYNAMIC_BLEND_CONSTANTS = 1 << 4, - TU_DYNAMIC_DEPTH_BOUNDS = 1 << 5, - TU_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, - TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, - TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8, - TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, - TU_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10, - TU_DYNAMIC_ALL = (1 << 11) - 1, -}; - struct tu_vertex_binding { struct tu_buffer *buffer; VkDeviceSize offset; }; -struct tu_viewport_state -{ - uint32_t count; - VkViewport viewports[MAX_VIEWPORTS]; -}; - -struct tu_scissor_state -{ - uint32_t count; - VkRect2D scissors[MAX_SCISSORS]; -}; - -struct tu_discard_rectangle_state -{ - uint32_t count; - VkRect2D rectangles[MAX_DISCARD_RECTANGLES]; -}; - -struct tu_dynamic_state -{ - /** - * Bitmask of (1 << VK_DYNAMIC_STATE_*). - * Defines the set of saved dynamic state. - */ - uint32_t mask; - - struct tu_viewport_state viewport; - - struct tu_scissor_state scissor; - - float line_width; - - struct - { - float bias; - float clamp; - float slope; - } depth_bias; - - float blend_constants[4]; - - struct - { - float min; - float max; - } depth_bounds; - - struct - { - uint32_t front; - uint32_t back; - } stencil_compare_mask; - - struct - { - uint32_t front; - uint32_t back; - } stencil_write_mask; - - struct - { - uint32_t front; - uint32_t back; - } stencil_reference; - - struct tu_discard_rectangle_state discard_rectangle; -}; - -extern const struct tu_dynamic_state default_dynamic_state; - const char * tu_get_debug_option_name(int id); @@ -686,69 +702,17 @@ tu_get_perftest_option_name(int id); struct tu_descriptor_state { struct tu_descriptor_set *sets[MAX_SETS]; - uint32_t valid; - struct tu_push_descriptor_set push_set; - bool push_dirty; uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS]; - uint32_t input_attachments[MAX_RTS * A6XX_TEX_CONST_DWORDS]; -}; - -struct tu_tile -{ - uint8_t pipe; - uint8_t slot; - VkOffset2D begin; - VkOffset2D end; -}; - -struct tu_tiling_config -{ - VkRect2D render_area; - - /* position and size of the first tile */ - VkRect2D tile0; - /* number of tiles */ - VkExtent2D tile_count; - - /* size of the first VSC pipe */ - VkExtent2D pipe0; - /* number of VSC pipes */ - VkExtent2D pipe_count; - - /* pipe register values */ - uint32_t pipe_config[MAX_VSC_PIPES]; - uint32_t pipe_sizes[MAX_VSC_PIPES]; - - /* Whether sysmem rendering must be used */ - bool force_sysmem; }; enum tu_cmd_dirty_bits { - TU_CMD_DIRTY_PIPELINE = 1 << 0, - TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1, TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2, - - TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3, - TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4, - TU_CMD_DIRTY_PUSH_CONSTANTS = 1 << 5, - TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6, - TU_CMD_DIRTY_INPUT_ATTACHMENTS = 1 << 7, - - TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16, - TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17, - TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 18, - TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19, - TU_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 20, - TU_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 21, -}; - -struct tu_streamout_state { - uint16_t stride[IR3_MAX_SO_BUFFERS]; - uint32_t ncomp[IR3_MAX_SO_BUFFERS]; - uint32_t prog[IR3_MAX_SO_OUTPUTS * 2]; - uint32_t prog_count; - uint32_t vpc_so_buf_cntl; + TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3, + TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = 1 << 4, + TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5, + /* all draw states were disabled and need to be re-enabled: */ + TU_CMD_DIRTY_DRAW_STATE = 1 << 7, }; /* There are only three cache domains we have to care about: the CCU, or @@ -790,13 +754,34 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8, TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9, - TU_ACCESS_SYSMEM_READ = 1 << 10, - TU_ACCESS_SYSMEM_WRITE = 1 << 11, + /* Accesses by the host */ + TU_ACCESS_HOST_READ = 1 << 10, + TU_ACCESS_HOST_WRITE = 1 << 11, + + /* Accesses by a GPU engine which bypasses any cache. e.g. writes via + * CP_EVENT_WRITE::BLIT and the CP are SYSMEM_WRITE. + */ + TU_ACCESS_SYSMEM_READ = 1 << 12, + TU_ACCESS_SYSMEM_WRITE = 1 << 13, + + /* Set if a WFI is required. This can be required for: + * - 2D engine which (on some models) doesn't wait for flushes to complete + * before starting + * - CP draw indirect opcodes, where we need to wait for any flushes to + * complete but the CP implicitly waits for WFI's to complete and + * therefore we only need a WFI after the flushes. + */ + TU_ACCESS_WFI_READ = 1 << 14, - /* Set if a WFI is required due to data being read by the CP or the 2D - * engine. + /* Set if a CP_WAIT_FOR_ME is required due to the data being read by the CP + * without it waiting for any WFI. */ - TU_ACCESS_WFI_READ = 1 << 12, + TU_ACCESS_WFM_READ = 1 << 15, + + /* Memory writes from the CP start in-order with draws and event writes, + * but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read. + */ + TU_ACCESS_CP_WRITE = 1 << 16, TU_ACCESS_READ = TU_ACCESS_UCHE_READ | @@ -804,7 +789,10 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_DEPTH_READ | TU_ACCESS_CCU_COLOR_INCOHERENT_READ | TU_ACCESS_CCU_DEPTH_INCOHERENT_READ | - TU_ACCESS_SYSMEM_READ, + TU_ACCESS_HOST_READ | + TU_ACCESS_SYSMEM_READ | + TU_ACCESS_WFI_READ | + TU_ACCESS_WFM_READ, TU_ACCESS_WRITE = TU_ACCESS_UCHE_WRITE | @@ -812,7 +800,9 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE | TU_ACCESS_CCU_DEPTH_WRITE | TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE | - TU_ACCESS_SYSMEM_WRITE, + TU_ACCESS_HOST_WRITE | + TU_ACCESS_SYSMEM_WRITE | + TU_ACCESS_CP_WRITE, TU_ACCESS_ALL = TU_ACCESS_READ | @@ -826,18 +816,31 @@ enum tu_cmd_flush_bits { TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3, TU_CMD_FLAG_CACHE_FLUSH = 1 << 4, TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5, + TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6, + TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7, + TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8, TU_CMD_FLAG_ALL_FLUSH = TU_CMD_FLAG_CCU_FLUSH_DEPTH | TU_CMD_FLAG_CCU_FLUSH_COLOR | - TU_CMD_FLAG_CACHE_FLUSH, + TU_CMD_FLAG_CACHE_FLUSH | + /* Treat the CP as a sort of "cache" which may need to be "flushed" via + * waiting for writes to land with WAIT_FOR_MEM_WRITES. + */ + TU_CMD_FLAG_WAIT_MEM_WRITES, - TU_CMD_FLAG_ALL_INVALIDATE = + TU_CMD_FLAG_GPU_INVALIDATE = TU_CMD_FLAG_CCU_INVALIDATE_DEPTH | TU_CMD_FLAG_CCU_INVALIDATE_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE, - TU_CMD_FLAG_WFI = 1 << 6, + TU_CMD_FLAG_ALL_INVALIDATE = + TU_CMD_FLAG_GPU_INVALIDATE | + /* Treat the CP as a sort of "cache" which may need to be "invalidated" + * via waiting for UCHE/CCU flushes to land with WFI/WFM. + */ + TU_CMD_FLAG_WAIT_FOR_IDLE | + TU_CMD_FLAG_WAIT_FOR_ME, }; /* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty @@ -875,25 +878,30 @@ struct tu_cmd_state VkDeviceSize offsets[MAX_VBS]; } vb; - struct tu_dynamic_state dynamic; + /* for dynamic states that can't be emitted directly */ + uint32_t dynamic_stencil_mask; + uint32_t dynamic_stencil_wrmask; + uint32_t dynamic_stencil_ref; + uint32_t dynamic_gras_su_cntl; - /* Stream output buffers */ - struct - { - struct tu_buffer *buffers[IR3_MAX_SO_BUFFERS]; - VkDeviceSize offsets[IR3_MAX_SO_BUFFERS]; - VkDeviceSize sizes[IR3_MAX_SO_BUFFERS]; - } streamout_buf; + /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ + struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; + struct tu_draw_state vertex_buffers; + struct tu_draw_state shader_const[MESA_SHADER_STAGES]; + struct tu_draw_state desc_sets; - uint8_t streamout_reset; - uint8_t streamout_enabled; + struct tu_draw_state vs_params; /* Index buffer */ - struct tu_buffer *index_buffer; - uint64_t index_offset; - uint32_t index_type; - uint32_t max_index_count; uint64_t index_va; + uint32_t max_index_count; + uint8_t index_size; + + /* because streamout base has to be 32-byte aligned + * there is an extra offset to deal with when it is + * unaligned + */ + uint8_t streamout_offset[IR3_MAX_SO_BUFFERS]; /* Renderpasses are tricky, because we may need to flush differently if * using sysmem vs. gmem and therefore we have to delay any flushing that @@ -909,14 +917,20 @@ struct tu_cmd_state const struct tu_render_pass *pass; const struct tu_subpass *subpass; const struct tu_framebuffer *framebuffer; - - struct tu_tiling_config tiling_config; + VkRect2D render_area; struct tu_cs_entry tile_store_ib; + + bool xfb_used; + bool has_tess; + bool has_subpass_predication; + bool predication_active; }; struct tu_cmd_pool { + struct vk_object_base base; + VkAllocationCallbacks alloc; struct list_head cmd_buffers; struct list_head free_cmd_buffers; @@ -940,6 +954,18 @@ enum tu_cmd_buffer_status TU_CMD_BUFFER_STATUS_PENDING, }; +#ifndef MSM_SUBMIT_BO_READ +#define MSM_SUBMIT_BO_READ 0x0001 +#define MSM_SUBMIT_BO_WRITE 0x0002 +#define MSM_SUBMIT_BO_DUMP 0x0004 + +struct drm_msm_gem_submit_bo { + uint32_t flags; /* in, mask of MSM_SUBMIT_BO_x */ + uint32_t handle; /* in, GEM handle */ + uint64_t presumed; /* in/out, presumed buffer address */ +}; +#endif + struct tu_bo_list { uint32_t count; @@ -962,31 +988,9 @@ tu_bo_list_add(struct tu_bo_list *list, VkResult tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other); -/* This struct defines the layout of the scratch_bo */ -struct tu6_control -{ - uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */ - uint32_t _pad0; - volatile uint32_t vsc_overflow; - uint32_t _pad1; - /* flag set from cmdstream when VSC overflow detected: */ - uint32_t vsc_scratch; - uint32_t _pad2; - uint32_t _pad3; - uint32_t _pad4; - - /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ - struct { - uint32_t offset; - uint32_t pad[7]; - } flush_base[4]; -}; - -#define ctrl_offset(member) offsetof(struct tu6_control, member) - struct tu_cmd_buffer { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; struct tu_device *device; @@ -1018,13 +1022,8 @@ struct tu_cmd_buffer struct tu_cs draw_epilogue_cs; struct tu_cs sub_cs; - struct tu_bo scratch_bo; - - struct tu_bo vsc_draw_strm; - struct tu_bo vsc_prim_strm; uint32_t vsc_draw_strm_pitch; uint32_t vsc_prim_strm_pitch; - bool use_vsc_data; }; /* Temporary struct for tracking a register state to be written, used by @@ -1062,50 +1061,20 @@ tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer, struct tu_event { + struct vk_object_base base; struct tu_bo bo; }; -static inline gl_shader_stage -vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) -{ - assert(__builtin_popcount(vk_stage) == 1); - return ffs(vk_stage) - 1; -} - -static inline VkShaderStageFlagBits -mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) -{ - return (1 << mesa_stage); -} - -#define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) - -#define tu_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, \ - __tmp = (gl_shader_stage)((stage_bits) &TU_STAGE_MASK); \ - stage = __builtin_ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage))) - -uint32_t -tu6_stage2opcode(gl_shader_stage type); -enum a6xx_state_block -tu6_stage2shadersb(gl_shader_stage type); - struct tu_shader_module { + struct vk_object_base base; + unsigned char sha1[20]; uint32_t code_size; const uint32_t *code[0]; }; -struct tu_shader_compile_options -{ - struct ir3_shader_key key; - - bool optimize; - bool include_binning_pass; -}; - struct tu_push_constant_range { uint32_t lo; @@ -1114,21 +1083,10 @@ struct tu_push_constant_range struct tu_shader { - struct ir3_shader ir3_shader; + struct ir3_shader *ir3_shader; struct tu_push_constant_range push_consts; - unsigned attachment_idx[MAX_RTS]; uint8_t active_desc_sets; - - /* This may be true for vertex shaders. When true, variants[1] is the - * binning variant and binning_binary is non-NULL. - */ - bool has_binning_pass; - - void *binary; - void *binning_binary; - - struct ir3_shader_variant variants[0]; }; struct tu_shader * @@ -1143,21 +1101,8 @@ tu_shader_destroy(struct tu_device *dev, struct tu_shader *shader, const VkAllocationCallbacks *alloc); -void -tu_shader_compile_options_init( - struct tu_shader_compile_options *options, - const VkGraphicsPipelineCreateInfo *pipeline_info); - -VkResult -tu_shader_compile(struct tu_device *dev, - struct tu_shader *shader, - const struct tu_shader *next_stage, - const struct tu_shader_compile_options *options, - const VkAllocationCallbacks *alloc); - struct tu_program_descriptor_linkage { - struct ir3_ubo_analysis_state ubo_state; struct ir3_const_state const_state; uint32_t constlen; @@ -1167,9 +1112,9 @@ struct tu_program_descriptor_linkage struct tu_pipeline { - struct tu_cs cs; + struct vk_object_base base; - struct tu_dynamic_state dynamic_state; + struct tu_cs cs; struct tu_pipeline_layout *layout; @@ -1177,27 +1122,30 @@ struct tu_pipeline VkShaderStageFlags active_stages; uint32_t active_desc_sets; - struct tu_streamout_state streamout; + /* mask of enabled dynamic states + * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used + */ + uint32_t dynamic_state_mask; + struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; + + /* gras_su_cntl without line width, used for dynamic line width state */ + uint32_t gras_su_cntl; + + /* draw states for the pipeline */ + struct tu_draw_state load_state, rast_state, ds_state, blend_state; struct { - struct tu_bo binary_bo; - struct tu_cs_entry state_ib; - struct tu_cs_entry binning_state_ib; + struct tu_draw_state state; + struct tu_draw_state binning_state; struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES]; - unsigned input_attachment_idx[MAX_RTS]; } program; struct { - struct tu_cs_entry state_ib; - } load_state; - - struct - { - struct tu_cs_entry state_ib; - struct tu_cs_entry binning_state_ib; + struct tu_draw_state state; + struct tu_draw_state binning_state; uint32_t bindings_used; } vi; @@ -1209,24 +1157,12 @@ struct tu_pipeline struct { - struct tu_cs_entry state_ib; - } vp; - - struct - { - uint32_t gras_su_cntl; - struct tu_cs_entry state_ib; - } rast; - - struct - { - struct tu_cs_entry state_ib; - } ds; - - struct - { - struct tu_cs_entry state_ib; - } blend; + uint32_t patch_type; + uint32_t param_stride; + uint32_t hs_bo_regid; + uint32_t ds_bo_regid; + bool upper_left_domain_origin; + } tess; struct { @@ -1243,31 +1179,12 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor); void tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc); -void -tu6_emit_gras_su_cntl(struct tu_cs *cs, - uint32_t gras_su_cntl, - float line_width); - void tu6_emit_depth_bias(struct tu_cs *cs, float constant_factor, float clamp, float slope_factor); -void -tu6_emit_stencil_compare_mask(struct tu_cs *cs, - uint32_t front, - uint32_t back); - -void -tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back); - -void -tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back); - -void -tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]); - void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples); void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2); @@ -1283,9 +1200,12 @@ tu6_emit_xs_config(struct tu_cs *cs, void tu6_emit_vpc(struct tu_cs *cs, const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *hs, + const struct ir3_shader_variant *ds, const struct ir3_shader_variant *gs, const struct ir3_shader_variant *fs, - struct tu_streamout_state *tf); + uint32_t patch_control_points, + bool vshs_workgroup); void tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs); @@ -1354,10 +1274,10 @@ tu6_base_format(VkFormat format) return tu6_format_color(format, TILE6_LINEAR).fmt; } -enum a6xx_depth_format tu6_pipe2depth(VkFormat format); - struct tu_image { + struct vk_object_base base; + VkImageType type; /* The original VkFormat provided by the client. This may not match any * of the actual surface formats. @@ -1372,7 +1292,8 @@ struct tu_image uint32_t layer_count; VkSampleCountFlagBits samples; - struct fdl_layout layout; + struct fdl_layout layout[3]; + uint32_t total_size; unsigned queue_family_mask; bool exclusive; @@ -1404,13 +1325,10 @@ tu_get_levelCount(const struct tu_image *image, : range->levelCount; } -enum a3xx_msaa_samples -tu_msaa_samples(uint32_t samples); -enum a6xx_tex_fetchsize -tu6_fetchsize(VkFormat format); - struct tu_image_view { + struct vk_object_base base; + struct tu_image *image; /**< VkImageViewCreateInfo::image */ uint64_t base_addr; @@ -1444,9 +1362,16 @@ struct tu_image_view uint32_t RB_2D_DST_INFO; uint32_t RB_BLIT_DST_INFO; + + /* for d32s8 separate stencil */ + uint64_t stencil_base_addr; + uint32_t stencil_layer_size; + uint32_t stencil_PITCH; }; struct tu_sampler_ycbcr_conversion { + struct vk_object_base base; + VkFormat format; VkSamplerYcbcrModelConversion ycbcr_model; VkSamplerYcbcrRange ycbcr_range; @@ -1456,6 +1381,8 @@ struct tu_sampler_ycbcr_conversion { }; struct tu_sampler { + struct vk_object_base base; + uint32_t descriptor[A6XX_TEX_SAMP_DWORDS]; struct tu_sampler_ycbcr_conversion *ycbcr_sampler; }; @@ -1469,8 +1396,11 @@ tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t void tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); -enum a6xx_tex_filter -tu6_tex_filter(VkFilter filter, unsigned aniso); +void +tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); + +#define tu_image_view_stencil(iview, x) \ + ((iview->x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT)) VkResult tu_image_create(VkDevice _device, @@ -1488,11 +1418,14 @@ tu_image_from_gralloc(VkDevice device_h, VkImage *out_image_h); void -tu_image_view_init(struct tu_image_view *view, - const VkImageViewCreateInfo *pCreateInfo); +tu_image_view_init(struct tu_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + bool limited_z24s8); struct tu_buffer_view { + struct vk_object_base base; + uint32_t descriptor[A6XX_TEX_CONST_DWORDS]; struct tu_buffer *buffer; @@ -1509,14 +1442,35 @@ struct tu_attachment_info struct tu_framebuffer { + struct vk_object_base base; + uint32_t width; uint32_t height; uint32_t layers; + /* size of the first tile */ + VkExtent2D tile0; + /* number of tiles */ + VkExtent2D tile_count; + + /* size of the first VSC pipe */ + VkExtent2D pipe0; + /* number of VSC pipes */ + VkExtent2D pipe_count; + + /* pipe register values */ + uint32_t pipe_config[MAX_VSC_PIPES]; + uint32_t pipe_sizes[MAX_VSC_PIPES]; + uint32_t attachment_count; struct tu_attachment_info attachments[0]; }; +void +tu_framebuffer_tiling_config(struct tu_framebuffer *fb, + const struct tu_device *device, + const struct tu_render_pass *pass); + struct tu_subpass_barrier { VkPipelineStageFlags src_stage_mask; VkAccessFlags src_access_mask; @@ -1527,7 +1481,6 @@ struct tu_subpass_barrier { struct tu_subpass_attachment { uint32_t attachment; - VkImageLayout layout; }; struct tu_subpass @@ -1540,7 +1493,6 @@ struct tu_subpass struct tu_subpass_attachment depth_stencil_attachment; VkSampleCountFlagBits samples; - bool has_external_src, has_external_dst; uint32_t srgb_cntl; @@ -1555,12 +1507,17 @@ struct tu_render_pass_attachment VkImageAspectFlags clear_mask; bool load; bool store; - VkImageLayout initial_layout, final_layout; int32_t gmem_offset; + /* for D32S8 separate stencil: */ + bool load_stencil; + bool store_stencil; + int32_t gmem_offset_stencil; }; struct tu_render_pass { + struct vk_object_base base; + uint32_t attachment_count; uint32_t subpass_count; uint32_t gmem_pixels; @@ -1573,6 +1530,8 @@ struct tu_render_pass struct tu_query_pool { + struct vk_object_base base; + VkQueryType type; uint32_t stride; uint64_t size; @@ -1580,10 +1539,26 @@ struct tu_query_pool struct tu_bo bo; }; +enum tu_semaphore_kind +{ + TU_SEMAPHORE_NONE, + TU_SEMAPHORE_SYNCOBJ, +}; + +struct tu_semaphore_part +{ + enum tu_semaphore_kind kind; + union { + uint32_t syncobj; + }; +}; + struct tu_semaphore { - uint32_t syncobj; - uint32_t temp_syncobj; + struct vk_object_base base; + + struct tu_semaphore_part permanent; + struct tu_semaphore_part temporary; }; void @@ -1609,14 +1584,11 @@ tu_update_descriptor_set_with_template( VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData); -int -tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id); - -int -tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size); - -int -tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base); +VkResult +tu_physical_device_init(struct tu_physical_device *device, + struct tu_instance *instance); +VkResult +tu_enumerate_devices(struct tu_instance *instance); int tu_drm_submitqueue_new(const struct tu_device *dev, @@ -1626,21 +1598,6 @@ tu_drm_submitqueue_new(const struct tu_device *dev, void tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id); -uint32_t -tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags); -uint32_t -tu_gem_import_dmabuf(const struct tu_device *dev, - int prime_fd, - uint64_t size); -int -tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle); -void -tu_gem_close(const struct tu_device *dev, uint32_t gem_handle); -uint64_t -tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle); -uint64_t -tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle); - #define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \ \ static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \