X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Ffreedreno%2Fvulkan%2Ftu_private.h;h=661529fa537be7fb2889e1115db5a743f41f4a55;hb=9ff1959ca5d24afe48bec20edf7e3d059d254134;hp=4f78f7163e5f08132873f2534b5860e12036ad2f;hpb=2d3182b429552651f54650fcc9ea53d41fabe6de;p=mesa.git diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 4f78f7163e5..661529fa537 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -96,10 +96,6 @@ typedef uint32_t xcb_window_t; #define MAX_VIEWS 8 /* The Qualcomm driver exposes 0x20000058 */ #define MAX_STORAGE_BUFFER_RANGE 0x20000000 -/* TODO: this isn't a hardware limit, but for a high # of attachments - * we are missing logic to avoid having them all in GMEM at the same time - */ -#define MAX_ATTACHMENTS 64 #define NUM_DEPTH_CLEAR_PIPELINES 3 @@ -317,9 +313,17 @@ struct tu_physical_device unsigned gpu_id; uint32_t gmem_size; + uint64_t gmem_base; uint32_t tile_align_w; uint32_t tile_align_h; + struct { + uint32_t RB_UNKNOWN_8E04_blit; /* for CP_BLIT's */ + uint32_t RB_CCU_CNTL_gmem; /* for GMEM */ + uint32_t PC_UNKNOWN_9805; + uint32_t SP_UNKNOWN_A0F8; + } magic; + /* This is the drivers on-disk cache used as a fallback as opposed to * the pipeline cache defined by apps. */ @@ -334,6 +338,8 @@ enum tu_debug_flags TU_DEBUG_NIR = 1 << 1, TU_DEBUG_IR3 = 1 << 2, TU_DEBUG_NOBIN = 1 << 3, + TU_DEBUG_SYSMEM = 1 << 4, + TU_DEBUG_FORCEBIN = 1 << 5, }; struct tu_instance @@ -457,6 +463,14 @@ struct tu_queue struct tu_fence submit_fence; }; +struct tu_bo +{ + uint32_t gem_handle; + uint64_t size; + uint64_t iova; + void *map; +}; + struct tu_device { VK_LOADER_DATA _loader_data; @@ -477,20 +491,17 @@ struct tu_device /* Backup in-memory cache to be used if the app doesn't provide one */ struct tu_pipeline_cache *mem_cache; + struct tu_bo vsc_data; + struct tu_bo vsc_data2; + uint32_t vsc_data_pitch; + uint32_t vsc_data2_pitch; + struct list_head shader_slabs; mtx_t shader_slab_mutex; struct tu_device_extension_table enabled_extensions; }; -struct tu_bo -{ - uint32_t gem_handle; - uint64_t size; - uint64_t iova; - void *map; -}; - VkResult tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size); VkResult @@ -559,6 +570,7 @@ struct tu_cs uint32_t *reserved_end; uint32_t *end; + struct tu_device *device; enum tu_cs_mode mode; uint32_t next_bo_size; @@ -569,6 +581,10 @@ struct tu_cs struct tu_bo **bos; uint32_t bo_count; uint32_t bo_capacity; + + /* state for cond_exec_start/cond_exec_end */ + uint32_t cond_flags; + uint32_t *cond_dwords; }; struct tu_device_memory @@ -782,7 +798,6 @@ tu_get_perftest_option_name(int id); struct tu_descriptor_state { struct tu_descriptor_set *sets[MAX_SETS]; - uint32_t dirty; uint32_t valid; struct tu_push_descriptor_set push_set; bool push_dirty; @@ -800,16 +815,12 @@ struct tu_tile struct tu_tiling_config { VkRect2D render_area; - uint32_t buffer_cpp[MAX_ATTACHMENTS]; - uint32_t buffer_count; /* position and size of the first tile */ VkRect2D tile0; /* number of tiles */ VkExtent2D tile_count; - uint32_t gmem_offsets[MAX_ATTACHMENTS]; - /* size of the first VSC pipe */ VkExtent2D pipe0; /* number of VSC pipes */ @@ -818,6 +829,9 @@ struct tu_tiling_config /* pipe register values */ uint32_t pipe_config[MAX_VSC_PIPES]; uint32_t pipe_sizes[MAX_VSC_PIPES]; + + /* Whether sysmem rendering must be used */ + bool force_sysmem; }; enum tu_cmd_dirty_bits @@ -827,6 +841,7 @@ enum tu_cmd_dirty_bits TU_CMD_DIRTY_VERTEX_BUFFERS = 1 << 2, TU_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 3, TU_CMD_DIRTY_PUSH_CONSTANTS = 1 << 4, + TU_CMD_DIRTY_STREAMOUT_BUFFERS = 1 << 5, TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 16, TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 17, @@ -834,6 +849,14 @@ enum tu_cmd_dirty_bits TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 19, }; +struct tu_streamout_state { + uint16_t stride[IR3_MAX_SO_BUFFERS]; + uint32_t ncomp[IR3_MAX_SO_BUFFERS]; + uint32_t prog[IR3_MAX_SO_OUTPUTS * 2]; + uint32_t prog_count; + uint32_t vpc_so_buf_cntl; +}; + struct tu_cmd_state { uint32_t dirty; @@ -850,6 +873,17 @@ struct tu_cmd_state struct tu_dynamic_state dynamic; + /* Stream output buffers */ + struct + { + struct tu_buffer *buffers[IR3_MAX_SO_BUFFERS]; + VkDeviceSize offsets[IR3_MAX_SO_BUFFERS]; + VkDeviceSize sizes[IR3_MAX_SO_BUFFERS]; + } streamout_buf; + + uint8_t streamout_reset; + uint8_t streamout_enabled; + /* Index buffer */ struct tu_buffer *index_buffer; uint64_t index_offset; @@ -863,7 +897,6 @@ struct tu_cmd_state struct tu_tiling_config tiling_config; - struct tu_cs_entry tile_load_ib; struct tu_cs_entry tile_store_ib; }; @@ -944,15 +977,14 @@ struct tu_cmd_buffer struct tu_bo_list bo_list; struct tu_cs cs; struct tu_cs draw_cs; + struct tu_cs draw_epilogue_cs; struct tu_cs sub_cs; - uint16_t marker_reg; - uint32_t marker_seqno; - struct tu_bo scratch_bo; uint32_t scratch_seqno; #define VSC_OVERFLOW 0x8 #define VSC_SCRATCH 0x10 +#define VSC_FLUSH 0x20 struct tu_bo vsc_data; struct tu_bo vsc_data2; @@ -963,6 +995,19 @@ struct tu_cmd_buffer bool wait_for_idle; }; +/* Temporary struct for tracking a register state to be written, used by + * a6xx-pack.h and tu_cs_emit_regs() + */ +struct tu_reg_value { + uint32_t reg; + uint64_t value; + bool is_address; + struct tu_bo *bo; + bool bo_write; + uint32_t bo_offset; + uint32_t bo_shift; +}; + unsigned tu6_emit_event_write(struct tu_cmd_buffer *cmd, struct tu_cs *cs, @@ -1052,10 +1097,10 @@ struct tu_descriptor_map /* TODO: avoid fixed size array/justify the size */ unsigned num; /* number of array entries */ unsigned num_desc; /* Number of descriptors (sum of array_size[]) */ - int set[64]; - int binding[64]; - int value[64]; - int array_size[64]; + int set[128]; + int binding[128]; + int value[128]; + int array_size[128]; }; struct tu_shader @@ -1066,6 +1111,7 @@ struct tu_shader struct tu_descriptor_map sampler_map; struct tu_descriptor_map ubo_map; struct tu_descriptor_map ssbo_map; + struct tu_descriptor_map image_map; /* This may be true for vertex shaders. When true, variants[1] is the * binning variant and binning_binary is non-NULL. @@ -1113,7 +1159,7 @@ struct tu_program_descriptor_linkage struct tu_descriptor_map sampler_map; struct tu_descriptor_map ubo_map; struct tu_descriptor_map ssbo_map; - struct ir3_ibo_mapping image_mapping; + struct tu_descriptor_map image_map; }; struct tu_pipeline @@ -1127,6 +1173,8 @@ struct tu_pipeline bool need_indirect_descriptor_sets; VkShaderStageFlags active_stages; + struct tu_streamout_state streamout; + struct { struct tu_bo binary_bo; @@ -1237,17 +1285,24 @@ struct tu_graphics_pipeline_create_info uint32_t custom_blend_mode; }; +enum tu_supported_formats { + FMT_VERTEX = 1, + FMT_TEXTURE = 2, + FMT_COLOR = 4, +}; + struct tu_native_format { - int vtx; /* VFMTn_xxx or -1 */ - int tex; /* TFMTn_xxx or -1 */ - int rb; /* RBn_xxx or -1 */ - int swap; /* enum a3xx_color_swap */ - bool present; /* internal only; always true to external users */ + enum a6xx_format fmt : 8; + enum a3xx_color_swap swap : 8; + enum tu_supported_formats supported : 8; }; -const struct tu_native_format * -tu6_get_native_format(VkFormat format); +struct tu_native_format tu6_get_native_format(VkFormat format); +struct tu_native_format tu6_format_vtx(VkFormat format); +enum a6xx_format tu6_format_gmem(VkFormat format); +struct tu_native_format tu6_format_color(VkFormat format, bool tiled); +struct tu_native_format tu6_format_texture(VkFormat format, bool tiled); void tu_pack_clear_value(const VkClearValue *val, @@ -1260,7 +1315,7 @@ tu_2d_clear_color(const VkClearColorValue *val, VkFormat format, uint32_t buf[4] void tu_2d_clear_zs(const VkClearDepthStencilValue *val, VkFormat format, uint32_t buf[4]); -enum a6xx_2d_ifmt tu6_rb_fmt_to_ifmt(enum a6xx_color_fmt fmt); +enum a6xx_2d_ifmt tu6_fmt_to_ifmt(enum a6xx_format fmt); enum a6xx_depth_format tu6_pipe2depth(VkFormat format); struct tu_image_level @@ -1345,10 +1400,18 @@ tu_image_base(struct tu_image *image, int level, int layer) fdl_surface_offset(&image->layout, level, layer); } +#define tu_image_base_ref(image, level, layer) \ + .bo = image->bo, \ + .bo_offset = (image->bo_offset + fdl_surface_offset(&image->layout, \ + level, layer)) + +#define tu_image_view_base_ref(iview) \ + tu_image_base_ref(iview->image, iview->base_mip, iview->base_layer) + static inline VkDeviceSize tu_image_ubwc_size(struct tu_image *image, int level) { - return image->layout.ubwc_size; + return image->layout.ubwc_layer_size; } static inline uint32_t @@ -1357,14 +1420,28 @@ tu_image_ubwc_pitch(struct tu_image *image, int level) return image->layout.ubwc_slices[level].pitch; } +static inline uint64_t +tu_image_ubwc_surface_offset(struct tu_image *image, int level, int layer) +{ + return image->layout.ubwc_slices[level].offset + + layer * tu_image_ubwc_size(image, level); +} + static inline uint64_t tu_image_ubwc_base(struct tu_image *image, int level, int layer) { return image->bo->iova + image->bo_offset + - image->layout.ubwc_slices[level].offset + - layer * tu_image_ubwc_size(image, level); + tu_image_ubwc_surface_offset(image, level, layer); } +#define tu_image_ubwc_base_ref(image, level, layer) \ + .bo = image->bo, \ + .bo_offset = (image->bo_offset + tu_image_ubwc_surface_offset(image, \ + level, layer)) + +#define tu_image_view_ubwc_base_ref(iview) \ + tu_image_ubwc_base_ref(iview->image, iview->base_mip, iview->base_layer) + enum a6xx_tile_mode tu6_get_image_tile_mode(struct tu_image *image, int level); enum a3xx_msaa_samples @@ -1420,9 +1497,9 @@ tu_image_view_init(struct tu_image_view *view, struct tu_buffer_view { - VkFormat vk_format; - uint64_t range; /**< VkBufferViewCreateInfo::range */ - uint32_t state[4]; + uint32_t descriptor[A6XX_TEX_CONST_DWORDS]; + + struct tu_buffer *buffer; }; void tu_buffer_view_init(struct tu_buffer_view *view, @@ -1501,13 +1578,14 @@ struct tu_render_pass_attachment VkAttachmentLoadOp stencil_load_op; VkAttachmentStoreOp store_op; VkAttachmentStoreOp stencil_store_op; - bool needs_gmem; + int32_t gmem_offset; }; struct tu_render_pass { uint32_t attachment_count; uint32_t subpass_count; + uint32_t gmem_pixels; struct tu_subpass_attachment *subpass_attachments; struct tu_render_pass_attachment *attachments; struct tu_subpass subpasses[0]; @@ -1520,12 +1598,11 @@ tu_device_finish_meta(struct tu_device *device); struct tu_query_pool { + VkQueryType type; uint32_t stride; - uint32_t availability_offset; uint64_t size; - char *ptr; - VkQueryType type; - uint32_t pipeline_stats_mask; + uint32_t pipeline_statistics; + struct tu_bo bo; }; struct tu_semaphore @@ -1571,6 +1648,9 @@ tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id); int tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size); +int +tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base); + int tu_drm_submitqueue_new(const struct tu_device *dev, int priority, @@ -1594,6 +1674,21 @@ tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle); uint64_t tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle); + +void +tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + uint32_t attachment, + const VkClearValue *value, + const VkClearRect *rect); + +void +tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + uint32_t attachment, + uint8_t component_mask, + const VkClearValue *value); + #define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \ \ static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \