X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_private.h;h=7470d69b868a0a2b7224e236f2b465826d6f19ee;hb=76f711d09d2f8c9b3bcd9f8c1694e553a486ac1f;hp=5e50b983302ea69c1d07f618f87c7ca1930ac28a;hpb=f745ceecee6058f488d0ac44a5f2d97b8798e00d;p=mesa.git diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 5e50b983302..7470d69b868 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -47,11 +47,12 @@ #include "main/macros.h" #include "util/list.h" #include "util/macros.h" +#include "util/u_atomic.h" #include "vk_alloc.h" +#include "vk_object.h" #include "vk_debug_report.h" #include "wsi_common.h" -#include "drm-uapi/msm_drm.h" #include "ir3/ir3_compiler.h" #include "ir3/ir3_shader.h" @@ -62,6 +63,7 @@ #include "tu_descriptor_set.h" #include "tu_extensions.h" +#include "tu_util.h" /* Pre-declarations needed for WSI entrypoints */ struct wl_surface; @@ -109,25 +111,10 @@ typedef uint32_t xcb_window_t; #define tu_printflike(a, b) __attribute__((__format__(__printf__, a, b))) -static inline uint32_t -tu_minify(uint32_t n, uint32_t levels) -{ - if (unlikely(n == 0)) - return 0; - else - return MAX2(n >> levels, 1); -} - #define for_each_bit(b, dword) \ for (uint32_t __dword = (dword); \ (b) = __builtin_ffs(__dword) - 1, __dword; __dword &= ~(1 << (b))) -#define typed_memcpy(dest, src, count) \ - ({ \ - STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \ - memcpy((dest), (src), (count) * sizeof(*(src))); \ - }) - #define COND(bool, val) ((bool) ? (val) : 0) #define BIT(bit) (1u << (bit)) @@ -187,7 +174,7 @@ tu_lookup_entrypoint_checked( struct tu_physical_device { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; struct tu_instance *instance; @@ -219,6 +206,11 @@ struct tu_physical_device uint32_t SP_UNKNOWN_A0F8; } magic; + int msm_major_version; + int msm_minor_version; + + bool limited_z24s8; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -240,7 +232,7 @@ enum tu_debug_flags struct tu_instance { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; VkAllocationCallbacks alloc; @@ -272,6 +264,8 @@ struct cache_entry; struct tu_pipeline_cache { + struct vk_object_base base; + struct tu_device *device; pthread_mutex_t mutex; @@ -296,6 +290,7 @@ struct tu_pipeline_key struct tu_fence { + struct vk_object_base base; struct wsi_fence *fence_wsi; bool signaled; int fd; @@ -316,7 +311,8 @@ tu_fence_wait_idle(struct tu_fence *fence); struct tu_queue { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; + struct tu_device *device; uint32_t queue_family_index; int queue_idx; @@ -334,29 +330,64 @@ struct tu_bo void *map; }; -struct tu_device +enum global_shader { + GLOBAL_SH_VS, + GLOBAL_SH_FS_BLIT, + GLOBAL_SH_FS_CLEAR0, + GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS, + GLOBAL_SH_COUNT, +}; + +#define TU_BORDER_COLOR_COUNT 4096 +#define TU_BORDER_COLOR_BUILTIN 6 + +/* This struct defines the layout of the global_bo */ +struct tu6_global { - VK_LOADER_DATA _loader_data; + /* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */ + instr_t shaders[GLOBAL_SH_COUNT][16]; - VkAllocationCallbacks alloc; + uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */ + uint32_t _pad0; + volatile uint32_t vsc_draw_overflow; + uint32_t _pad1; + volatile uint32_t vsc_prim_overflow; + uint32_t _pad2; + uint64_t predicate; + + /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ + struct { + uint32_t offset; + uint32_t pad[7]; + } flush_base[4]; + + /* note: larger global bo will be used for customBorderColors */ + struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[]; +}; +#define gb_offset(member) offsetof(struct tu6_global, member) +#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member)) + +void tu_init_clear_blit_shaders(struct tu6_global *global); + +/* extra space in vsc draw/prim streams */ +#define VSC_PAD 0x40 +struct tu_device +{ + struct vk_device vk; struct tu_instance *instance; struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES]; int queue_count[TU_MAX_QUEUE_FAMILIES]; struct tu_physical_device *physical_device; + int _lost; struct ir3_compiler *compiler; /* Backup in-memory cache to be used if the app doesn't provide one */ struct tu_pipeline_cache *mem_cache; - struct tu_bo vsc_draw_strm; - struct tu_bo vsc_prim_strm; - uint32_t vsc_draw_strm_pitch; - uint32_t vsc_prim_strm_pitch; - #define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */ /* Currently the kernel driver uses a 32-bit GPU address space, but it @@ -368,11 +399,28 @@ struct tu_device bool initialized; } scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2]; - struct tu_bo border_color; + struct tu_bo global_bo; struct tu_device_extension_table enabled_extensions; + + uint32_t vsc_draw_strm_pitch; + uint32_t vsc_prim_strm_pitch; + BITSET_DECLARE(custom_border_color, TU_BORDER_COLOR_COUNT); + mtx_t mutex; }; +VkResult _tu_device_set_lost(struct tu_device *device, + const char *file, int line, + const char *msg, ...) PRINTFLIKE(4, 5); +#define tu_device_set_lost(dev, ...) \ + _tu_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__) + +static inline bool +tu_device_is_lost(struct tu_device *device) +{ + return unlikely(p_atomic_read(&device->_lost)); +} + VkResult tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size); VkResult @@ -405,7 +453,7 @@ struct tu_cs_entry uint32_t offset; }; -struct ts_cs_memory { +struct tu_cs_memory { uint32_t *map; uint64_t iova; }; @@ -426,6 +474,7 @@ enum tu_draw_state_group_id { TU_DRAW_STATE_PROGRAM, TU_DRAW_STATE_PROGRAM_BINNING, + TU_DRAW_STATE_TESS, TU_DRAW_STATE_VB, TU_DRAW_STATE_VI, TU_DRAW_STATE_VI_BINNING, @@ -433,6 +482,8 @@ enum tu_draw_state_group_id TU_DRAW_STATE_DS, TU_DRAW_STATE_BLEND, TU_DRAW_STATE_VS_CONST, + TU_DRAW_STATE_HS_CONST, + TU_DRAW_STATE_DS_CONST, TU_DRAW_STATE_GS_CONST, TU_DRAW_STATE_FS_CONST, TU_DRAW_STATE_DESC_SETS, @@ -505,6 +556,8 @@ struct tu_cs struct tu_device_memory { + struct vk_object_base base; + struct tu_bo bo; VkDeviceSize size; @@ -525,6 +578,8 @@ struct tu_descriptor_range struct tu_descriptor_set { + struct vk_object_base base; + const struct tu_descriptor_set_layout *layout; struct tu_descriptor_pool *pool; uint32_t size; @@ -552,6 +607,8 @@ struct tu_descriptor_pool_entry struct tu_descriptor_pool { + struct vk_object_base base; + struct tu_bo bo; uint64_t current_offset; uint64_t size; @@ -594,12 +651,16 @@ struct tu_descriptor_update_template_entry struct tu_descriptor_update_template { + struct vk_object_base base; + uint32_t entry_count; struct tu_descriptor_update_template_entry entry[0]; }; struct tu_buffer { + struct vk_object_base base; + VkDeviceSize size; VkBufferUsageFlags usage; @@ -633,56 +694,16 @@ struct tu_descriptor_state uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS]; }; -struct tu_tile -{ - uint8_t pipe; - uint8_t slot; - VkOffset2D begin; - VkOffset2D end; -}; - -struct tu_tiling_config -{ - VkRect2D render_area; - - /* position and size of the first tile */ - VkRect2D tile0; - /* number of tiles */ - VkExtent2D tile_count; - - /* size of the first VSC pipe */ - VkExtent2D pipe0; - /* number of VSC pipes */ - VkExtent2D pipe_count; - - /* pipe register values */ - uint32_t pipe_config[MAX_VSC_PIPES]; - uint32_t pipe_sizes[MAX_VSC_PIPES]; - - /* Whether sysmem rendering must be used */ - bool force_sysmem; -}; - enum tu_cmd_dirty_bits { - TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1, TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2, - TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3, - TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4, + TU_CMD_DIRTY_DESC_SETS_LOAD = 1 << 3, + TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = 1 << 4, TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5, - TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6, /* all draw states were disabled and need to be re-enabled: */ TU_CMD_DIRTY_DRAW_STATE = 1 << 7, }; -struct tu_streamout_state { - uint16_t stride[IR3_MAX_SO_BUFFERS]; - uint32_t ncomp[IR3_MAX_SO_BUFFERS]; - uint32_t prog[IR3_MAX_SO_OUTPUTS * 2]; - uint32_t prog_count; - uint32_t vpc_so_buf_cntl; -}; - /* There are only three cache domains we have to care about: the CCU, or * color cache unit, which is used for color and depth/stencil attachments * and copy/blit destinations, and is split conceptually into color and depth, @@ -722,13 +743,34 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8, TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9, - TU_ACCESS_SYSMEM_READ = 1 << 10, - TU_ACCESS_SYSMEM_WRITE = 1 << 11, + /* Accesses by the host */ + TU_ACCESS_HOST_READ = 1 << 10, + TU_ACCESS_HOST_WRITE = 1 << 11, + + /* Accesses by a GPU engine which bypasses any cache. e.g. writes via + * CP_EVENT_WRITE::BLIT and the CP are SYSMEM_WRITE. + */ + TU_ACCESS_SYSMEM_READ = 1 << 12, + TU_ACCESS_SYSMEM_WRITE = 1 << 13, + + /* Set if a WFI is required. This can be required for: + * - 2D engine which (on some models) doesn't wait for flushes to complete + * before starting + * - CP draw indirect opcodes, where we need to wait for any flushes to + * complete but the CP implicitly waits for WFI's to complete and + * therefore we only need a WFI after the flushes. + */ + TU_ACCESS_WFI_READ = 1 << 14, + + /* Set if a CP_WAIT_FOR_ME is required due to the data being read by the CP + * without it waiting for any WFI. + */ + TU_ACCESS_WFM_READ = 1 << 15, - /* Set if a WFI is required due to data being read by the CP or the 2D - * engine. + /* Memory writes from the CP start in-order with draws and event writes, + * but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read. */ - TU_ACCESS_WFI_READ = 1 << 12, + TU_ACCESS_CP_WRITE = 1 << 16, TU_ACCESS_READ = TU_ACCESS_UCHE_READ | @@ -736,7 +778,10 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_DEPTH_READ | TU_ACCESS_CCU_COLOR_INCOHERENT_READ | TU_ACCESS_CCU_DEPTH_INCOHERENT_READ | - TU_ACCESS_SYSMEM_READ, + TU_ACCESS_HOST_READ | + TU_ACCESS_SYSMEM_READ | + TU_ACCESS_WFI_READ | + TU_ACCESS_WFM_READ, TU_ACCESS_WRITE = TU_ACCESS_UCHE_WRITE | @@ -744,7 +789,9 @@ enum tu_cmd_access_mask { TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE | TU_ACCESS_CCU_DEPTH_WRITE | TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE | - TU_ACCESS_SYSMEM_WRITE, + TU_ACCESS_HOST_WRITE | + TU_ACCESS_SYSMEM_WRITE | + TU_ACCESS_CP_WRITE, TU_ACCESS_ALL = TU_ACCESS_READ | @@ -758,18 +805,31 @@ enum tu_cmd_flush_bits { TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3, TU_CMD_FLAG_CACHE_FLUSH = 1 << 4, TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5, + TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6, + TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7, + TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8, TU_CMD_FLAG_ALL_FLUSH = TU_CMD_FLAG_CCU_FLUSH_DEPTH | TU_CMD_FLAG_CCU_FLUSH_COLOR | - TU_CMD_FLAG_CACHE_FLUSH, + TU_CMD_FLAG_CACHE_FLUSH | + /* Treat the CP as a sort of "cache" which may need to be "flushed" via + * waiting for writes to land with WAIT_FOR_MEM_WRITES. + */ + TU_CMD_FLAG_WAIT_MEM_WRITES, - TU_CMD_FLAG_ALL_INVALIDATE = + TU_CMD_FLAG_GPU_INVALIDATE = TU_CMD_FLAG_CCU_INVALIDATE_DEPTH | TU_CMD_FLAG_CCU_INVALIDATE_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE, - TU_CMD_FLAG_WFI = 1 << 6, + TU_CMD_FLAG_ALL_INVALIDATE = + TU_CMD_FLAG_GPU_INVALIDATE | + /* Treat the CP as a sort of "cache" which may need to be "invalidated" + * via waiting for UCHE/CCU flushes to land with WFI/WFM. + */ + TU_CMD_FLAG_WAIT_FOR_IDLE | + TU_CMD_FLAG_WAIT_FOR_ME, }; /* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty @@ -815,28 +875,22 @@ struct tu_cmd_state /* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */ struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT]; - struct tu_cs_entry vertex_buffers_ib; - struct tu_cs_entry shader_const_ib[MESA_SHADER_STAGES]; - struct tu_cs_entry desc_sets_ib, desc_sets_load_ib; - struct tu_cs_entry ia_gmem_ib, ia_sysmem_ib; - - /* Stream output buffers */ - struct - { - struct tu_buffer *buffers[IR3_MAX_SO_BUFFERS]; - VkDeviceSize offsets[IR3_MAX_SO_BUFFERS]; - VkDeviceSize sizes[IR3_MAX_SO_BUFFERS]; - } streamout_buf; + struct tu_draw_state vertex_buffers; + struct tu_draw_state shader_const[MESA_SHADER_STAGES]; + struct tu_draw_state desc_sets; - uint8_t streamout_reset; - uint8_t streamout_enabled; + struct tu_draw_state vs_params; /* Index buffer */ - struct tu_buffer *index_buffer; - uint64_t index_offset; - uint32_t index_type; - uint32_t max_index_count; uint64_t index_va; + uint32_t max_index_count; + uint8_t index_size; + + /* because streamout base has to be 32-byte aligned + * there is an extra offset to deal with when it is + * unaligned + */ + uint8_t streamout_offset[IR3_MAX_SO_BUFFERS]; /* Renderpasses are tricky, because we may need to flush differently if * using sysmem vs. gmem and therefore we have to delay any flushing that @@ -852,14 +906,20 @@ struct tu_cmd_state const struct tu_render_pass *pass; const struct tu_subpass *subpass; const struct tu_framebuffer *framebuffer; - - struct tu_tiling_config tiling_config; + VkRect2D render_area; struct tu_cs_entry tile_store_ib; + + bool xfb_used; + bool has_tess; + bool has_subpass_predication; + bool predication_active; }; struct tu_cmd_pool { + struct vk_object_base base; + VkAllocationCallbacks alloc; struct list_head cmd_buffers; struct list_head free_cmd_buffers; @@ -883,6 +943,18 @@ enum tu_cmd_buffer_status TU_CMD_BUFFER_STATUS_PENDING, }; +#ifndef MSM_SUBMIT_BO_READ +#define MSM_SUBMIT_BO_READ 0x0001 +#define MSM_SUBMIT_BO_WRITE 0x0002 +#define MSM_SUBMIT_BO_DUMP 0x0004 + +struct drm_msm_gem_submit_bo { + uint32_t flags; /* in, mask of MSM_SUBMIT_BO_x */ + uint32_t handle; /* in, GEM handle */ + uint64_t presumed; /* in/out, presumed buffer address */ +}; +#endif + struct tu_bo_list { uint32_t count; @@ -905,31 +977,9 @@ tu_bo_list_add(struct tu_bo_list *list, VkResult tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other); -/* This struct defines the layout of the scratch_bo */ -struct tu6_control -{ - uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */ - uint32_t _pad0; - volatile uint32_t vsc_overflow; - uint32_t _pad1; - /* flag set from cmdstream when VSC overflow detected: */ - uint32_t vsc_scratch; - uint32_t _pad2; - uint32_t _pad3; - uint32_t _pad4; - - /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ - struct { - uint32_t offset; - uint32_t pad[7]; - } flush_base[4]; -}; - -#define ctrl_offset(member) offsetof(struct tu6_control, member) - struct tu_cmd_buffer { - VK_LOADER_DATA _loader_data; + struct vk_object_base base; struct tu_device *device; @@ -961,13 +1011,8 @@ struct tu_cmd_buffer struct tu_cs draw_epilogue_cs; struct tu_cs sub_cs; - struct tu_bo scratch_bo; - - struct tu_bo vsc_draw_strm; - struct tu_bo vsc_prim_strm; uint32_t vsc_draw_strm_pitch; uint32_t vsc_prim_strm_pitch; - bool use_vsc_data; }; /* Temporary struct for tracking a register state to be written, used by @@ -1005,36 +1050,14 @@ tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer, struct tu_event { + struct vk_object_base base; struct tu_bo bo; }; -static inline gl_shader_stage -vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage) -{ - assert(__builtin_popcount(vk_stage) == 1); - return ffs(vk_stage) - 1; -} - -static inline VkShaderStageFlagBits -mesa_to_vk_shader_stage(gl_shader_stage mesa_stage) -{ - return (1 << mesa_stage); -} - -#define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1) - -#define tu_foreach_stage(stage, stage_bits) \ - for (gl_shader_stage stage, \ - __tmp = (gl_shader_stage)((stage_bits) &TU_STAGE_MASK); \ - stage = __builtin_ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage))) - -uint32_t -tu6_stage2opcode(gl_shader_stage type); -enum a6xx_state_block -tu6_stage2shadersb(gl_shader_stage type); - struct tu_shader_module { + struct vk_object_base base; + unsigned char sha1[20]; uint32_t code_size; @@ -1069,7 +1092,6 @@ tu_shader_destroy(struct tu_device *dev, struct tu_program_descriptor_linkage { - struct ir3_ubo_analysis_state ubo_state; struct ir3_const_state const_state; uint32_t constlen; @@ -1079,6 +1101,8 @@ struct tu_program_descriptor_linkage struct tu_pipeline { + struct vk_object_base base; + struct tu_cs cs; struct tu_pipeline_layout *layout; @@ -1087,8 +1111,6 @@ struct tu_pipeline VkShaderStageFlags active_stages; uint32_t active_desc_sets; - struct tu_streamout_state streamout; - /* mask of enabled dynamic states * if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used */ @@ -1098,24 +1120,21 @@ struct tu_pipeline /* gras_su_cntl without line width, used for dynamic line width state */ uint32_t gras_su_cntl; + /* draw states for the pipeline */ + struct tu_draw_state load_state, rast_state, ds_state, blend_state; + struct { - struct tu_bo binary_bo; - struct tu_cs_entry state_ib; - struct tu_cs_entry binning_state_ib; + struct tu_draw_state state; + struct tu_draw_state binning_state; struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES]; } program; struct { - struct tu_cs_entry state_ib; - } load_state; - - struct - { - struct tu_cs_entry state_ib; - struct tu_cs_entry binning_state_ib; + struct tu_draw_state state; + struct tu_draw_state binning_state; uint32_t bindings_used; } vi; @@ -1127,18 +1146,12 @@ struct tu_pipeline struct { - struct tu_cs_entry state_ib; - } rast; - - struct - { - struct tu_cs_entry state_ib; - } ds; - - struct - { - struct tu_cs_entry state_ib; - } blend; + uint32_t patch_type; + uint32_t param_stride; + uint32_t hs_bo_regid; + uint32_t ds_bo_regid; + bool upper_left_domain_origin; + } tess; struct { @@ -1176,9 +1189,12 @@ tu6_emit_xs_config(struct tu_cs *cs, void tu6_emit_vpc(struct tu_cs *cs, const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *hs, + const struct ir3_shader_variant *ds, const struct ir3_shader_variant *gs, const struct ir3_shader_variant *fs, - struct tu_streamout_state *tf); + uint32_t patch_control_points, + bool vshs_workgroup); void tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs); @@ -1247,10 +1263,10 @@ tu6_base_format(VkFormat format) return tu6_format_color(format, TILE6_LINEAR).fmt; } -enum a6xx_depth_format tu6_pipe2depth(VkFormat format); - struct tu_image { + struct vk_object_base base; + VkImageType type; /* The original VkFormat provided by the client. This may not match any * of the actual surface formats. @@ -1265,7 +1281,8 @@ struct tu_image uint32_t layer_count; VkSampleCountFlagBits samples; - struct fdl_layout layout; + struct fdl_layout layout[3]; + uint32_t total_size; unsigned queue_family_mask; bool exclusive; @@ -1297,13 +1314,10 @@ tu_get_levelCount(const struct tu_image *image, : range->levelCount; } -enum a3xx_msaa_samples -tu_msaa_samples(uint32_t samples); -enum a6xx_tex_fetchsize -tu6_fetchsize(VkFormat format); - struct tu_image_view { + struct vk_object_base base; + struct tu_image *image; /**< VkImageViewCreateInfo::image */ uint64_t base_addr; @@ -1337,9 +1351,16 @@ struct tu_image_view uint32_t RB_2D_DST_INFO; uint32_t RB_BLIT_DST_INFO; + + /* for d32s8 separate stencil */ + uint64_t stencil_base_addr; + uint32_t stencil_layer_size; + uint32_t stencil_PITCH; }; struct tu_sampler_ycbcr_conversion { + struct vk_object_base base; + VkFormat format; VkSamplerYcbcrModelConversion ycbcr_model; VkSamplerYcbcrRange ycbcr_range; @@ -1349,6 +1370,8 @@ struct tu_sampler_ycbcr_conversion { }; struct tu_sampler { + struct vk_object_base base; + uint32_t descriptor[A6XX_TEX_SAMP_DWORDS]; struct tu_sampler_ycbcr_conversion *ycbcr_sampler; }; @@ -1362,8 +1385,11 @@ tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t void tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); -enum a6xx_tex_filter -tu6_tex_filter(VkFilter filter, unsigned aniso); +void +tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); + +#define tu_image_view_stencil(iview, x) \ + ((iview->x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT)) VkResult tu_image_create(VkDevice _device, @@ -1381,11 +1407,14 @@ tu_image_from_gralloc(VkDevice device_h, VkImage *out_image_h); void -tu_image_view_init(struct tu_image_view *view, - const VkImageViewCreateInfo *pCreateInfo); +tu_image_view_init(struct tu_image_view *iview, + const VkImageViewCreateInfo *pCreateInfo, + bool limited_z24s8); struct tu_buffer_view { + struct vk_object_base base; + uint32_t descriptor[A6XX_TEX_CONST_DWORDS]; struct tu_buffer *buffer; @@ -1402,14 +1431,35 @@ struct tu_attachment_info struct tu_framebuffer { + struct vk_object_base base; + uint32_t width; uint32_t height; uint32_t layers; + /* size of the first tile */ + VkExtent2D tile0; + /* number of tiles */ + VkExtent2D tile_count; + + /* size of the first VSC pipe */ + VkExtent2D pipe0; + /* number of VSC pipes */ + VkExtent2D pipe_count; + + /* pipe register values */ + uint32_t pipe_config[MAX_VSC_PIPES]; + uint32_t pipe_sizes[MAX_VSC_PIPES]; + uint32_t attachment_count; struct tu_attachment_info attachments[0]; }; +void +tu_framebuffer_tiling_config(struct tu_framebuffer *fb, + const struct tu_device *device, + const struct tu_render_pass *pass); + struct tu_subpass_barrier { VkPipelineStageFlags src_stage_mask; VkAccessFlags src_access_mask; @@ -1420,7 +1470,6 @@ struct tu_subpass_barrier { struct tu_subpass_attachment { uint32_t attachment; - VkImageLayout layout; }; struct tu_subpass @@ -1433,7 +1482,6 @@ struct tu_subpass struct tu_subpass_attachment depth_stencil_attachment; VkSampleCountFlagBits samples; - bool has_external_src, has_external_dst; uint32_t srgb_cntl; @@ -1448,12 +1496,17 @@ struct tu_render_pass_attachment VkImageAspectFlags clear_mask; bool load; bool store; - VkImageLayout initial_layout, final_layout; int32_t gmem_offset; + /* for D32S8 separate stencil: */ + bool load_stencil; + bool store_stencil; + int32_t gmem_offset_stencil; }; struct tu_render_pass { + struct vk_object_base base; + uint32_t attachment_count; uint32_t subpass_count; uint32_t gmem_pixels; @@ -1466,6 +1519,8 @@ struct tu_render_pass struct tu_query_pool { + struct vk_object_base base; + VkQueryType type; uint32_t stride; uint64_t size; @@ -1473,10 +1528,26 @@ struct tu_query_pool struct tu_bo bo; }; +enum tu_semaphore_kind +{ + TU_SEMAPHORE_NONE, + TU_SEMAPHORE_SYNCOBJ, +}; + +struct tu_semaphore_part +{ + enum tu_semaphore_kind kind; + union { + uint32_t syncobj; + }; +}; + struct tu_semaphore { - uint32_t syncobj; - uint32_t temp_syncobj; + struct vk_object_base base; + + struct tu_semaphore_part permanent; + struct tu_semaphore_part temporary; }; void @@ -1502,14 +1573,11 @@ tu_update_descriptor_set_with_template( VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData); -int -tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id); - -int -tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size); - -int -tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base); +VkResult +tu_physical_device_init(struct tu_physical_device *device, + struct tu_instance *instance); +VkResult +tu_enumerate_devices(struct tu_instance *instance); int tu_drm_submitqueue_new(const struct tu_device *dev, @@ -1519,21 +1587,6 @@ tu_drm_submitqueue_new(const struct tu_device *dev, void tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id); -uint32_t -tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags); -uint32_t -tu_gem_import_dmabuf(const struct tu_device *dev, - int prime_fd, - uint64_t size); -int -tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle); -void -tu_gem_close(const struct tu_device *dev, uint32_t gem_handle); -uint64_t -tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle); -uint64_t -tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle); - #define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \ \ static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \