#include "main/macros.h"
#include "util/list.h"
#include "util/macros.h"
+#include "util/u_atomic.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
#include "wsi_common.h"
#include "tu_descriptor_set.h"
#include "tu_extensions.h"
+#include "tu_util.h"
/* Pre-declarations needed for WSI entrypoints */
struct wl_surface;
uint32_t SP_UNKNOWN_A0F8;
} magic;
+ int msm_major_version;
+ int msm_minor_version;
+
/* This is the drivers on-disk cache used as a fallback as opposed to
* the pipeline cache defined by apps.
*/
void *map;
};
+enum global_shader {
+ GLOBAL_SH_VS,
+ GLOBAL_SH_VS_LAYER,
+ GLOBAL_SH_GS_LAYER,
+ GLOBAL_SH_FS_BLIT,
+ GLOBAL_SH_FS_CLEAR0,
+ GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS,
+ GLOBAL_SH_COUNT,
+};
+
+/* This struct defines the layout of the global_bo */
+struct tu6_global
+{
+ /* 6 bcolor_entry entries, one for each VK_BORDER_COLOR */
+ uint8_t border_color[128 * 6];
+
+ /* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */
+ instr_t shaders[GLOBAL_SH_COUNT][16];
+
+ uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
+ uint32_t _pad0;
+ volatile uint32_t vsc_draw_overflow;
+ uint32_t _pad1;
+ volatile uint32_t vsc_prim_overflow;
+ uint32_t _pad2[3];
+
+ /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
+ struct {
+ uint32_t offset;
+ uint32_t pad[7];
+ } flush_base[4];
+};
+#define gb_offset(member) offsetof(struct tu6_global, member)
+#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))
+
+void tu_init_clear_blit_shaders(struct tu6_global *global);
+
+/* extra space in vsc draw/prim streams */
+#define VSC_PAD 0x40
+
struct tu_device
{
VK_LOADER_DATA _loader_data;
int queue_count[TU_MAX_QUEUE_FAMILIES];
struct tu_physical_device *physical_device;
+ int _lost;
struct ir3_compiler *compiler;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct tu_pipeline_cache *mem_cache;
- struct tu_bo vsc_draw_strm;
- struct tu_bo vsc_prim_strm;
- uint32_t vsc_draw_strm_pitch;
- uint32_t vsc_prim_strm_pitch;
-
#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */
/* Currently the kernel driver uses a 32-bit GPU address space, but it
bool initialized;
} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];
- struct tu_bo border_color;
+ struct tu_bo global_bo;
struct tu_device_extension_table enabled_extensions;
+
+ uint32_t vsc_draw_strm_pitch;
+ uint32_t vsc_prim_strm_pitch;
+ mtx_t vsc_pitch_mtx;
};
+VkResult _tu_device_set_lost(struct tu_device *device,
+ const char *file, int line,
+ const char *msg, ...) PRINTFLIKE(4, 5);
+#define tu_device_set_lost(dev, ...) \
+ _tu_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
+
+static inline bool
+tu_device_is_lost(struct tu_device *device)
+{
+ return unlikely(p_atomic_read(&device->_lost));
+}
+
VkResult
tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size);
VkResult
uint32_t offset;
};
-struct ts_cs_memory {
+struct tu_cs_memory {
uint32_t *map;
uint64_t iova;
};
{
TU_DRAW_STATE_PROGRAM,
TU_DRAW_STATE_PROGRAM_BINNING,
+ TU_DRAW_STATE_TESS,
TU_DRAW_STATE_VB,
TU_DRAW_STATE_VI,
TU_DRAW_STATE_VI_BINNING,
TU_DRAW_STATE_DS,
TU_DRAW_STATE_BLEND,
TU_DRAW_STATE_VS_CONST,
+ TU_DRAW_STATE_HS_CONST,
+ TU_DRAW_STATE_DS_CONST,
TU_DRAW_STATE_GS_CONST,
TU_DRAW_STATE_FS_CONST,
TU_DRAW_STATE_DESC_SETS,
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
};
-struct tu_tile
-{
- uint8_t pipe;
- uint8_t slot;
- VkOffset2D begin;
- VkOffset2D end;
-};
-
-struct tu_tiling_config
-{
- VkRect2D render_area;
-
- /* position and size of the first tile */
- VkRect2D tile0;
- /* number of tiles */
- VkExtent2D tile_count;
-
- /* size of the first VSC pipe */
- VkExtent2D pipe0;
- /* number of VSC pipes */
- VkExtent2D pipe_count;
-
- /* pipe register values */
- uint32_t pipe_config[MAX_VSC_PIPES];
- uint32_t pipe_sizes[MAX_VSC_PIPES];
-
- /* Whether sysmem rendering must be used */
- bool force_sysmem;
-};
-
enum tu_cmd_dirty_bits
{
TU_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 1,
TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3,
TU_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 4,
TU_CMD_DIRTY_SHADER_CONSTS = 1 << 5,
- TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 6,
/* all draw states were disabled and need to be re-enabled: */
TU_CMD_DIRTY_DRAW_STATE = 1 << 7,
};
-struct tu_streamout_state {
- uint16_t stride[IR3_MAX_SO_BUFFERS];
- uint32_t ncomp[IR3_MAX_SO_BUFFERS];
- uint32_t prog[IR3_MAX_SO_OUTPUTS * 2];
- uint32_t prog_count;
- uint32_t vpc_so_buf_cntl;
-};
-
/* There are only three cache domains we have to care about: the CCU, or
* color cache unit, which is used for color and depth/stencil attachments
* and copy/blit destinations, and is split conceptually into color and depth,
struct tu_cs_entry desc_sets_ib, desc_sets_load_ib;
struct tu_cs_entry ia_gmem_ib, ia_sysmem_ib;
- /* Stream output buffers */
- struct
- {
- struct tu_buffer *buffers[IR3_MAX_SO_BUFFERS];
- VkDeviceSize offsets[IR3_MAX_SO_BUFFERS];
- VkDeviceSize sizes[IR3_MAX_SO_BUFFERS];
- } streamout_buf;
-
- uint8_t streamout_reset;
- uint8_t streamout_enabled;
+ struct tu_draw_state vs_params;
/* Index buffer */
- struct tu_buffer *index_buffer;
- uint64_t index_offset;
- uint32_t index_type;
- uint32_t max_index_count;
uint64_t index_va;
+ uint32_t max_index_count;
+ uint8_t index_size;
+
+ /* because streamout base has to be 32-byte aligned
+ * there is an extra offset to deal with when it is
+ * unaligned
+ */
+ uint8_t streamout_offset[IR3_MAX_SO_BUFFERS];
/* Renderpasses are tricky, because we may need to flush differently if
* using sysmem vs. gmem and therefore we have to delay any flushing that
const struct tu_render_pass *pass;
const struct tu_subpass *subpass;
const struct tu_framebuffer *framebuffer;
-
- struct tu_tiling_config tiling_config;
+ VkRect2D render_area;
struct tu_cs_entry tile_store_ib;
+
+ bool xfb_used;
};
struct tu_cmd_pool
VkResult
tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other);
-/* This struct defines the layout of the scratch_bo */
-struct tu6_control
-{
- uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */
- uint32_t _pad0;
- volatile uint32_t vsc_overflow;
- uint32_t _pad1;
- /* flag set from cmdstream when VSC overflow detected: */
- uint32_t vsc_scratch;
- uint32_t _pad2;
- uint32_t _pad3;
- uint32_t _pad4;
-
- /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
- struct {
- uint32_t offset;
- uint32_t pad[7];
- } flush_base[4];
-};
-
-#define ctrl_offset(member) offsetof(struct tu6_control, member)
-
struct tu_cmd_buffer
{
VK_LOADER_DATA _loader_data;
struct tu_cs draw_epilogue_cs;
struct tu_cs sub_cs;
- struct tu_bo scratch_bo;
+ bool has_tess;
- struct tu_bo vsc_draw_strm;
- struct tu_bo vsc_prim_strm;
uint32_t vsc_draw_strm_pitch;
uint32_t vsc_prim_strm_pitch;
- bool use_vsc_data;
};
/* Temporary struct for tracking a register state to be written, used by
struct tu_bo bo;
};
-static inline gl_shader_stage
-vk_to_mesa_shader_stage(VkShaderStageFlagBits vk_stage)
-{
- assert(__builtin_popcount(vk_stage) == 1);
- return ffs(vk_stage) - 1;
-}
-
-static inline VkShaderStageFlagBits
-mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
-{
- return (1 << mesa_stage);
-}
-
-#define TU_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
-
-#define tu_foreach_stage(stage, stage_bits) \
- for (gl_shader_stage stage, \
- __tmp = (gl_shader_stage)((stage_bits) &TU_STAGE_MASK); \
- stage = __builtin_ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
-
-uint32_t
-tu6_stage2opcode(gl_shader_stage type);
-enum a6xx_state_block
-tu6_stage2shadersb(gl_shader_stage type);
-
struct tu_shader_module
{
unsigned char sha1[20];
struct tu_program_descriptor_linkage
{
- struct ir3_ubo_analysis_state ubo_state;
struct ir3_const_state const_state;
uint32_t constlen;
VkShaderStageFlags active_stages;
uint32_t active_desc_sets;
- struct tu_streamout_state streamout;
-
/* mask of enabled dynamic states
* if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used
*/
struct
{
- struct tu_bo binary_bo;
struct tu_cs_entry state_ib;
struct tu_cs_entry binning_state_ib;
bool primitive_restart;
} ia;
+ struct
+ {
+ uint32_t patch_type;
+ uint32_t param_stride;
+ uint32_t hs_bo_regid;
+ uint32_t ds_bo_regid;
+ bool upper_left_domain_origin;
+ } tess;
+
struct
{
struct tu_cs_entry state_ib;
void
tu6_emit_vpc(struct tu_cs *cs,
const struct ir3_shader_variant *vs,
+ const struct ir3_shader_variant *hs,
+ const struct ir3_shader_variant *ds,
const struct ir3_shader_variant *gs,
- const struct ir3_shader_variant *fs,
- struct tu_streamout_state *tf);
+ const struct ir3_shader_variant *fs);
void
tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);
return tu6_format_color(format, TILE6_LINEAR).fmt;
}
-enum a6xx_depth_format tu6_pipe2depth(VkFormat format);
-
struct tu_image
{
VkImageType type;
uint32_t layer_count;
VkSampleCountFlagBits samples;
- struct fdl_layout layout;
+ struct fdl_layout layout[3];
+ uint32_t total_size;
unsigned queue_family_mask;
bool exclusive;
: range->levelCount;
}
-enum a3xx_msaa_samples
-tu_msaa_samples(uint32_t samples);
-enum a6xx_tex_fetchsize
-tu6_fetchsize(VkFormat format);
-
struct tu_image_view
{
struct tu_image *image; /**< VkImageViewCreateInfo::image */
void
tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
-enum a6xx_tex_filter
-tu6_tex_filter(VkFilter filter, unsigned aniso);
-
VkResult
tu_image_create(VkDevice _device,
const VkImageCreateInfo *pCreateInfo,
uint32_t height;
uint32_t layers;
+ /* size of the first tile */
+ VkExtent2D tile0;
+ /* number of tiles */
+ VkExtent2D tile_count;
+
+ /* size of the first VSC pipe */
+ VkExtent2D pipe0;
+ /* number of VSC pipes */
+ VkExtent2D pipe_count;
+
+ /* pipe register values */
+ uint32_t pipe_config[MAX_VSC_PIPES];
+ uint32_t pipe_sizes[MAX_VSC_PIPES];
+
uint32_t attachment_count;
struct tu_attachment_info attachments[0];
};
+void
+tu_framebuffer_tiling_config(struct tu_framebuffer *fb,
+ const struct tu_device *device,
+ const struct tu_render_pass *pass);
+
struct tu_subpass_barrier {
VkPipelineStageFlags src_stage_mask;
VkAccessFlags src_access_mask;
struct tu_subpass_attachment
{
uint32_t attachment;
- VkImageLayout layout;
};
struct tu_subpass
struct tu_subpass_attachment depth_stencil_attachment;
VkSampleCountFlagBits samples;
- bool has_external_src, has_external_dst;
uint32_t srgb_cntl;
VkImageAspectFlags clear_mask;
bool load;
bool store;
- VkImageLayout initial_layout, final_layout;
int32_t gmem_offset;
};
struct tu_bo bo;
};
+enum tu_semaphore_kind
+{
+ TU_SEMAPHORE_NONE,
+ TU_SEMAPHORE_SYNCOBJ,
+};
+
+struct tu_semaphore_part
+{
+ enum tu_semaphore_kind kind;
+ union {
+ uint32_t syncobj;
+ };
+};
+
struct tu_semaphore
{
- uint32_t syncobj;
- uint32_t temp_syncobj;
+ struct tu_semaphore_part permanent;
+ struct tu_semaphore_part temporary;
};
void