X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_private.h;h=6b54a8ad60011d04923494e156b45fe40dcf07bd;hb=f3e37f5d262606f1a0f5a3073835ac0ecc41f629;hp=ee69ccf6f0f07a8dda3e7d6a688abfc86714f906;hpb=1b0ceba92582454e79f68a81fdf575dcfe07e5a6;p=mesa.git

diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index ee69ccf6f0f..6b54a8ad600 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -40,7 +40,7 @@
 #include <memcheck.h>
 #define VG(x) x
 #else
-#define VG(x)
+#define VG(x) ((void)0)
 #endif
 
 #include "c11/threads.h"
@@ -49,7 +49,6 @@
 #include "util/macros.h"
 #include "util/list.h"
 #include "util/xmlconfig.h"
-#include "main/macros.h"
 #include "vk_alloc.h"
 #include "vk_debug_report.h"
 
@@ -65,8 +64,6 @@
 #include "radv_extensions.h"
 #include "sid.h"
 
-#include <llvm-c/TargetMachine.h>
-
 /* Pre-declarations needed for WSI entrypoints */
 struct wl_surface;
 struct wl_display;
@@ -109,19 +106,16 @@ struct gfx10_format {
 
 #include "gfx10_format_table.h"
 
-enum radv_mem_heap {
-	RADV_MEM_HEAP_VRAM,
-	RADV_MEM_HEAP_VRAM_CPU_ACCESS,
-	RADV_MEM_HEAP_GTT,
-	RADV_MEM_HEAP_COUNT
-};
-
-enum radv_mem_type {
-	RADV_MEM_TYPE_VRAM,
-	RADV_MEM_TYPE_GTT_WRITE_COMBINE,
-	RADV_MEM_TYPE_VRAM_CPU_ACCESS,
-	RADV_MEM_TYPE_GTT_CACHED,
-	RADV_MEM_TYPE_COUNT
+enum radv_secure_compile_type {
+	RADV_SC_TYPE_INIT_SUCCESS,
+	RADV_SC_TYPE_INIT_FAILURE,
+	RADV_SC_TYPE_COMPILE_PIPELINE,
+	RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED,
+	RADV_SC_TYPE_READ_DISK_CACHE,
+	RADV_SC_TYPE_WRITE_DISK_CACHE,
+	RADV_SC_TYPE_FORK_DEVICE,
+	RADV_SC_TYPE_DESTROY_DEVICE,
+	RADV_SC_TYPE_COUNT
 };
 
 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
@@ -254,7 +248,7 @@ void radv_logi_v(const char *format, va_list va);
 				fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
 		})
 #else
-#define radv_assert(x)
+#define radv_assert(x) do {} while(0)
 #endif
 
 #define stub_return(v)					\
@@ -269,18 +263,30 @@ void radv_logi_v(const char *format, va_list va);
 		return;					\
 	} while (0)
 
-void *radv_lookup_entrypoint_unchecked(const char *name);
-void *radv_lookup_entrypoint_checked(const char *name,
-                                    uint32_t core_version,
-                                    const struct radv_instance_extension_table *instance,
-                                    const struct radv_device_extension_table *device);
-void *radv_lookup_physical_device_entrypoint_checked(const char *name,
-                                                     uint32_t core_version,
-                                                     const struct radv_instance_extension_table *instance);
+int radv_get_instance_entrypoint_index(const char *name);
+int radv_get_device_entrypoint_index(const char *name);
+int radv_get_physical_device_entrypoint_index(const char *name);
+
+const char *radv_get_instance_entry_name(int index);
+const char *radv_get_physical_device_entry_name(int index);
+const char *radv_get_device_entry_name(int index);
+
+bool radv_instance_entrypoint_is_enabled(int index, uint32_t core_version,
+					 const struct radv_instance_extension_table *instance);
+bool radv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,
+						const struct radv_instance_extension_table *instance);
+bool radv_device_entrypoint_is_enabled(int index, uint32_t core_version,
+				       const struct radv_instance_extension_table *instance,
+				       const struct radv_device_extension_table *device);
+
+void *radv_lookup_entrypoint(const char *name);
 
 struct radv_physical_device {
 	VK_LOADER_DATA                              _loader_data;
 
+	/* Link in radv_instance::physical_devices */
+	struct list_head                            link;
+
 	struct radv_instance *                       instance;
 
 	struct radeon_winsys *ws;
@@ -305,6 +311,9 @@ struct radv_physical_device {
 	/* Whether to enable NGG. */
 	bool use_ngg;
 
+	/* Whether to enable NGG GS. */
+	bool use_ngg_gs;
+
 	/* Whether to enable NGG streamout. */
 	bool use_ngg_streamout;
 
@@ -322,7 +331,8 @@ struct radv_physical_device {
 	struct disk_cache *                          disk_cache;
 
 	VkPhysicalDeviceMemoryProperties memory_properties;
-	enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
+	enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
+	enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
 
 	drmPciBusInfo bus_info;
 
@@ -335,23 +345,34 @@ struct radv_instance {
 	VkAllocationCallbacks                       alloc;
 
 	uint32_t                                    apiVersion;
-	int                                         physicalDeviceCount;
-	struct radv_physical_device                 physicalDevices[RADV_MAX_DRM_DEVICES];
 
 	char *                                      engineName;
 	uint32_t                                    engineVersion;
 
 	uint64_t debug_flags;
 	uint64_t perftest_flags;
+	uint8_t num_sc_threads;
 
 	struct vk_debug_report_instance             debug_report_callbacks;
 
 	struct radv_instance_extension_table enabled_extensions;
+	struct radv_instance_dispatch_table          dispatch;
+	struct radv_physical_device_dispatch_table   physical_device_dispatch;
+	struct radv_device_dispatch_table            device_dispatch;
+
+	bool                                        physical_devices_enumerated;
+	struct list_head                            physical_devices;
 
 	struct driOptionCache dri_options;
 	struct driOptionCache available_dri_options;
 };
 
+static inline
+bool radv_device_use_secure_compile(struct radv_instance *instance)
+{
+   return instance->num_sc_threads;
+}
+
 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
 void radv_finish_wsi(struct radv_physical_device *physical_device);
 
@@ -393,6 +414,11 @@ struct radv_pipeline_key {
 	uint32_t has_multiview_view_index : 1;
 	uint32_t optimisations_disabled : 1;
 	uint8_t topology;
+
+	/* Non-zero if a required subgroup size is specified via
+	 * VK_EXT_subgroup_size_control.
+	 */
+	uint8_t compute_subgroup_size;
 };
 
 struct radv_shader_binary;
@@ -475,10 +501,15 @@ struct radv_meta_state {
 		VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
 		VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
 		VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-	} clear[1 + MAX_SAMPLES_LOG2];
+
+		VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+		VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+		VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+	} clear[MAX_SAMPLES_LOG2];
 
 	VkPipelineLayout                          clear_color_p_layout;
 	VkPipelineLayout                          clear_depth_p_layout;
+	VkPipelineLayout                          clear_depth_unrestricted_p_layout;
 
 	/* Optimized compute fast HTILE clear for stencil or depth only. */
 	VkPipeline clear_htile_mask_pipeline;
@@ -518,7 +549,7 @@ struct radv_meta_state {
 		VkPipeline depth_only_pipeline[5];
 
 		VkPipeline stencil_only_pipeline[5];
-	} blit2d[1 + MAX_SAMPLES_LOG2];
+	} blit2d[MAX_SAMPLES_LOG2];
 
 	VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
 	VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
@@ -620,10 +651,10 @@ struct radv_meta_state {
 
 	struct {
 		VkPipelineLayout                          p_layout;
-		VkPipeline                                decompress_pipeline;
+		VkPipeline                                decompress_pipeline[NUM_DEPTH_DECOMPRESS_PIPELINES];
 		VkPipeline                                resummarize_pipeline;
 		VkRenderPass                              pass;
-	} depth_decomp[1 + MAX_SAMPLES_LOG2];
+	} depth_decomp[MAX_SAMPLES_LOG2];
 
 	struct {
 		VkPipelineLayout                          p_layout;
@@ -680,12 +711,15 @@ struct radv_queue {
 	int queue_idx;
 	VkDeviceQueueCreateFlags flags;
 
-	uint32_t scratch_size;
-	uint32_t compute_scratch_size;
+	uint32_t scratch_size_per_wave;
+	uint32_t scratch_waves;
+	uint32_t compute_scratch_size_per_wave;
+	uint32_t compute_scratch_waves;
 	uint32_t esgs_ring_size;
 	uint32_t gsvs_ring_size;
 	bool has_tess_rings;
 	bool has_gds;
+	bool has_gds_oa;
 	bool has_sample_positions;
 
 	struct radeon_winsys_bo *scratch_bo;
@@ -699,6 +733,9 @@ struct radv_queue {
 	struct radeon_cmdbuf *initial_preamble_cs;
 	struct radeon_cmdbuf *initial_full_flush_preamble_cs;
 	struct radeon_cmdbuf *continue_preamble_cs;
+
+	struct list_head pending_submissions;
+	pthread_mutex_t pending_mutex;
 };
 
 struct radv_bo_list {
@@ -707,6 +744,41 @@ struct radv_bo_list {
 	pthread_mutex_t mutex;
 };
 
+VkResult radv_bo_list_add(struct radv_device *device,
+			  struct radeon_winsys_bo *bo);
+void radv_bo_list_remove(struct radv_device *device,
+			 struct radeon_winsys_bo *bo);
+
+struct radv_secure_compile_process {
+	/* Secure process file descriptors. Used to communicate between the
+	 * user facing device and the idle forked device used to fork a clean
+	 * process for each new pipeline compile.
+	 */
+	int fd_secure_input;
+	int fd_secure_output;
+
+	/* FIFO file descriptors used to communicate between the user facing
+	 * device and the secure process that does the actual secure compile.
+	 */
+	int fd_server;
+	int fd_client;
+
+	/* Secure compile process id */
+	pid_t sc_pid;
+
+	/* Is the secure compile process currently in use by a thread */
+	bool in_use;
+};
+
+struct radv_secure_compile_state {
+	struct radv_secure_compile_process *secure_compile_processes;
+	uint32_t secure_compile_thread_counter;
+	mtx_t secure_compile_mutex;
+
+	/* Unique process ID used to build name for FIFO file descriptor */
+	char *uid;
+};
+
 struct radv_device {
 	VK_LOADER_DATA                              _loader_data;
 
@@ -766,6 +838,7 @@ struct radv_device {
 	uint64_t dmesg_timestamp;
 
 	struct radv_device_extension_table enabled_extensions;
+	struct radv_device_dispatch_table dispatch;
 
 	/* Whether the app has enabled the robustBufferAccess feature. */
 	bool robust_buffer_access;
@@ -777,6 +850,25 @@ struct radv_device {
 
 	/* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
 	int force_aniso;
+
+	struct radv_secure_compile_state *sc_state;
+
+	/* Condition variable for legacy timelines, to notify waiters when a
+	 * new point gets submitted. */
+	pthread_cond_t timeline_cond;
+
+	/* Thread trace. */
+	struct radeon_cmdbuf *thread_trace_start_cs[2];
+	struct radeon_cmdbuf *thread_trace_stop_cs[2];
+	struct radeon_winsys_bo *thread_trace_bo;
+	void *thread_trace_ptr;
+	uint32_t thread_trace_buffer_size;
+	int thread_trace_start_frame;
+
+	/* Overallocation. */
+	bool overallocation_disallowed;
+	uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
+	mtx_t overallocation_mutex;
 };
 
 struct radv_device_memory {
@@ -784,8 +876,8 @@ struct radv_device_memory {
 	/* for dedicated allocations */
 	struct radv_image                            *image;
 	struct radv_buffer                           *buffer;
-	uint32_t                                     type_index;
-	VkDeviceSize                                 map_size;
+	uint32_t                                     heap_index;
+	uint64_t                                     alloc_size;
 	void *                                       map;
 	void *                                       user_ptr;
 
@@ -803,6 +895,7 @@ struct radv_descriptor_range {
 struct radv_descriptor_set {
 	const struct radv_descriptor_set_layout *layout;
 	uint32_t size;
+	uint32_t buffer_count;
 
 	struct radeon_winsys_bo *bo;
 	uint64_t va;
@@ -896,7 +989,8 @@ enum radv_dynamic_state_bits {
 	RADV_DYNAMIC_STENCIL_REFERENCE    = 1 << 8,
 	RADV_DYNAMIC_DISCARD_RECTANGLE    = 1 << 9,
 	RADV_DYNAMIC_SAMPLE_LOCATIONS     = 1 << 10,
-	RADV_DYNAMIC_ALL                  = (1 << 11) - 1,
+	RADV_DYNAMIC_LINE_STIPPLE	  = 1 << 11,
+	RADV_DYNAMIC_ALL                  = (1 << 12) - 1,
 };
 
 enum radv_cmd_dirty_bits {
@@ -913,12 +1007,13 @@ enum radv_cmd_dirty_bits {
 	RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE         = 1 << 8,
 	RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE         = 1 << 9,
 	RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS          = 1 << 10,
-	RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 11) - 1,
-	RADV_CMD_DIRTY_PIPELINE                          = 1 << 11,
-	RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 12,
-	RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 13,
-	RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 14,
-	RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 15,
+	RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE		 = 1 << 11,
+	RADV_CMD_DIRTY_DYNAMIC_ALL                       = (1 << 12) - 1,
+	RADV_CMD_DIRTY_PIPELINE                          = 1 << 12,
+	RADV_CMD_DIRTY_INDEX_BUFFER                      = 1 << 13,
+	RADV_CMD_DIRTY_FRAMEBUFFER                       = 1 << 14,
+	RADV_CMD_DIRTY_VERTEX_BUFFER                     = 1 << 15,
+	RADV_CMD_DIRTY_STREAMOUT_BUFFER                  = 1 << 16,
 };
 
 enum radv_cmd_flush_bits {
@@ -1050,6 +1145,11 @@ struct radv_dynamic_state {
 	struct radv_discard_rectangle_state               discard_rectangle;
 
 	struct radv_sample_locations_state                sample_location;
+
+	struct {
+		uint32_t factor;
+		uint16_t pattern;
+	} line_stipple;
 };
 
 extern const struct radv_dynamic_state default_dynamic_state;
@@ -1108,6 +1208,9 @@ radv_initialise_ds_surface(struct radv_device *device,
 			   struct radv_ds_buffer_info *ds,
 			   struct radv_image_view *iview);
 
+bool
+radv_sc_read(int fd, void *buf, size_t size, bool timeout);
+
 /**
  * Attachment state when recording a renderpass instance.
  *
@@ -1118,6 +1221,7 @@ struct radv_attachment_state {
 	uint32_t                                     cleared_views;
 	VkClearValue                                 clear_value;
 	VkImageLayout                                current_layout;
+	VkImageLayout                                current_stencil_layout;
 	bool                                         current_in_render_loop;
 	struct radv_sample_locations_state	     sample_location;
 
@@ -1181,6 +1285,7 @@ struct radv_cmd_state {
 	unsigned                                     active_occlusion_queries;
 	bool                                         perfect_occlusion_queries_enabled;
 	unsigned                                     active_pipeline_queries;
+	unsigned                                     active_pipeline_gds_queries;
 	float					     offset_scale;
 	uint32_t                                      trace_id;
 	uint32_t                                      last_ia_multi_vgt_param;
@@ -1189,6 +1294,10 @@ struct radv_cmd_state {
 	uint32_t last_first_instance;
 	uint32_t last_vertex_offset;
 
+	uint32_t last_sx_ps_downconvert;
+	uint32_t last_sx_blend_opt_epsilon;
+	uint32_t last_sx_blend_opt_control;
+
 	/* Whether CP DMA is busy/idle. */
 	bool dma_is_busy;
 
@@ -1196,7 +1305,15 @@ struct radv_cmd_state {
 	int predication_type; /* -1: disabled, 0: normal, 1: inverted */
 	uint64_t predication_va;
 
+	/* Inheritance info. */
+	VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
+
 	bool context_roll_without_scissor_emitted;
+
+	/* SQTT related state. */
+	uint32_t current_event_type;
+	uint32_t num_events;
+	uint32_t num_layout_transitions;
 };
 
 struct radv_cmd_pool {
@@ -1243,16 +1360,19 @@ struct radv_cmd_buffer {
 	VkShaderStageFlags push_constant_stages;
 	struct radv_descriptor_set meta_push_descriptors;
 
-	struct radv_descriptor_state descriptors[VK_PIPELINE_BIND_POINT_RANGE_SIZE];
+	struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
 
 	struct radv_cmd_buffer_upload upload;
 
-	uint32_t scratch_size_needed;
-	uint32_t compute_scratch_size_needed;
+	uint32_t scratch_size_per_wave_needed;
+	uint32_t scratch_waves_wanted;
+	uint32_t compute_scratch_size_per_wave_needed;
+	uint32_t compute_scratch_waves_wanted;
 	uint32_t esgs_ring_size_needed;
 	uint32_t gsvs_ring_size_needed;
 	bool tess_rings_needed;
-	bool gds_needed; /* for GFX10 streamout */
+	bool gds_needed; /* for GFX10 streamout and NGG GS queries */
+	bool gds_oa_needed; /* for GFX10 streamout */
 	bool sample_positions_needed;
 
 	VkResult record_result;
@@ -1277,7 +1397,7 @@ struct radv_image_view;
 
 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
 
-void si_emit_graphics(struct radv_physical_device *physical_device,
+void si_emit_graphics(struct radv_device *device,
 		      struct radeon_cmdbuf *cs);
 void si_emit_compute(struct radv_physical_device *physical_device,
 		      struct radeon_cmdbuf *cs);
@@ -1342,11 +1462,11 @@ void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
 					   VkImageAspectFlags aspects,
-					   VkResolveModeFlagBitsKHR resolve_mode);
+					   VkResolveModeFlagBits resolve_mode);
 void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
 					   VkImageAspectFlags aspects,
-					   VkResolveModeFlagBitsKHR resolve_mode);
+					   VkResolveModeFlagBits resolve_mode);
 void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
 unsigned radv_get_default_max_sample_dist(int log_samples);
 void radv_device_init_msaa(struct radv_device *device);
@@ -1440,14 +1560,11 @@ struct radv_event {
 
 struct radv_shader_module;
 
-#define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0)
-#define RADV_HASH_SHADER_SISCHED             (1 << 1)
-#define RADV_HASH_SHADER_UNSAFE_MATH         (1 << 2)
-#define RADV_HASH_SHADER_NO_NGG              (1 << 3)
-#define RADV_HASH_SHADER_CS_WAVE32           (1 << 4)
-#define RADV_HASH_SHADER_PS_WAVE32           (1 << 5)
-#define RADV_HASH_SHADER_GE_WAVE32           (1 << 6)
-#define RADV_HASH_SHADER_ACO                 (1 << 7)
+#define RADV_HASH_SHADER_NO_NGG              (1 << 0)
+#define RADV_HASH_SHADER_CS_WAVE32           (1 << 1)
+#define RADV_HASH_SHADER_PS_WAVE32           (1 << 2)
+#define RADV_HASH_SHADER_GE_WAVE32           (1 << 3)
+#define RADV_HASH_SHADER_ACO                 (1 << 4)
 
 void
 radv_hash_shaders(unsigned char *hash,
@@ -1551,6 +1668,7 @@ struct radv_pipeline {
  			bool can_use_guardband;
 			uint32_t needed_dynamic_state;
 			bool disable_out_of_order_rast_for_occlusion;
+			uint8_t topology;
 
 			/* Used for rbplus */
 			uint32_t col_format;
@@ -1577,6 +1695,8 @@ static inline bool radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
 
 bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline);
 
+bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline);
+
 bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
 
 struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
@@ -1592,9 +1712,9 @@ struct radv_graphics_pipeline_create_info {
 	bool db_stencil_clear;
 	bool db_depth_disable_expclear;
 	bool db_stencil_disable_expclear;
-	bool db_flush_depth_inplace;
-	bool db_flush_stencil_inplace;
-	bool db_resummarize;
+	bool depth_compress_disable;
+	bool stencil_compress_disable;
+	bool resummarize_enable;
 	uint32_t custom_blend_mode;
 };
 
@@ -1606,6 +1726,15 @@ radv_graphics_pipeline_create(VkDevice device,
 			      const VkAllocationCallbacks *alloc,
 			      VkPipeline *pPipeline);
 
+struct radv_binning_settings {
+	unsigned context_states_per_bin; /* allowed range: [1, 6] */
+	unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+};
+
+struct radv_binning_settings
+radv_get_binning_settings(const struct radv_physical_device *pdev);
+
 struct vk_format_description;
 uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc,
 					  int first_non_void);
@@ -1687,13 +1816,6 @@ struct radv_image {
 	struct radv_image_plane planes[0];
 };
 
-/* Whether the image has a htile that is known consistent with the contents of
- * the image. */
-bool radv_layout_has_htile(const struct radv_image *image,
-                           VkImageLayout layout,
-                           bool in_render_loop,
-                           unsigned queue_mask);
-
 /* Whether the image has a htile  that is known consistent with the contents of
  * the image and is allowed to be in compressed form.
  *
@@ -1924,6 +2046,8 @@ VkResult radv_image_create(VkDevice _device,
 			   const VkAllocationCallbacks* alloc,
 			   VkImage *pImage);
 
+bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
+
 VkResult
 radv_image_from_gralloc(VkDevice device_h,
                        const VkImageCreateInfo *base_info,
@@ -1947,6 +2071,7 @@ radv_create_ahb_memory(struct radv_device *device,
 VkFormat
 radv_select_android_external_format(const void *next, VkFormat default_format);
 
+bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
 
 struct radv_image_view_extra_create_info {
 	bool disable_compression;
@@ -2047,6 +2172,7 @@ void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
 struct radv_subpass_attachment {
 	uint32_t         attachment;
 	VkImageLayout    layout;
+	VkImageLayout    stencil_layout;
 	bool             in_render_loop;
 };
 
@@ -2061,8 +2187,8 @@ struct radv_subpass {
 	struct radv_subpass_attachment *             resolve_attachments;
 	struct radv_subpass_attachment *             depth_stencil_attachment;
 	struct radv_subpass_attachment *             ds_resolve_attachment;
-	VkResolveModeFlagBitsKHR                     depth_resolve_mode;
-	VkResolveModeFlagBitsKHR                     stencil_resolve_mode;
+	VkResolveModeFlagBits                        depth_resolve_mode;
+	VkResolveModeFlagBits                        stencil_resolve_mode;
 
 	/** Subpass has at least one color resolve attachment */
 	bool                                         has_color_resolve;
@@ -2073,6 +2199,9 @@ struct radv_subpass {
 	struct radv_subpass_barrier                  start_barrier;
 
 	uint32_t                                     view_mask;
+
+	VkSampleCountFlagBits                        color_sample_count;
+	VkSampleCountFlagBits                        depth_sample_count;
 	VkSampleCountFlagBits                        max_sample_count;
 };
 
@@ -2086,6 +2215,8 @@ struct radv_render_pass_attachment {
 	VkAttachmentLoadOp                           stencil_load_op;
 	VkImageLayout                                initial_layout;
 	VkImageLayout                                final_layout;
+	VkImageLayout                                stencil_initial_layout;
+	VkImageLayout                                stencil_final_layout;
 
 	/* The subpass id in which the attachment will be used first/last. */
 	uint32_t				     first_subpass_idx;
@@ -2114,13 +2245,67 @@ struct radv_query_pool {
 	uint32_t pipeline_stats_mask;
 };
 
-struct radv_semaphore {
-	/* use a winsys sem for non-exportable */
-	struct radeon_winsys_sem *sem;
+typedef enum {
+	RADV_SEMAPHORE_NONE,
+	RADV_SEMAPHORE_WINSYS,
+	RADV_SEMAPHORE_SYNCOBJ,
+	RADV_SEMAPHORE_TIMELINE,
+} radv_semaphore_kind;
+
+struct radv_deferred_queue_submission;
+
+struct radv_timeline_waiter {
+	struct list_head list;
+	struct radv_deferred_queue_submission *submission;
+	uint64_t value;
+};
+
+struct radv_timeline_point {
+	struct list_head list;
+
+	uint64_t value;
 	uint32_t syncobj;
-	uint32_t temp_syncobj;
+
+	/* Separate from the list to accomodate CPU wait being async, as well
+	 * as prevent point deletion during submission. */
+	unsigned wait_count;
+};
+
+struct radv_timeline {
+	/* Using a pthread mutex to be compatible with condition variables. */
+	pthread_mutex_t mutex;
+
+	uint64_t highest_signaled;
+	uint64_t highest_submitted;
+
+	struct list_head points;
+
+	/* Keep free points on hand so we do not have to recreate syncobjs all
+	 * the time. */
+	struct list_head free_points;
+
+	/* Submissions that are deferred waiting for a specific value to be
+	 * submitted. */
+	struct list_head waiters;
+};
+
+struct radv_semaphore_part {
+	radv_semaphore_kind kind;
+	union {
+		uint32_t syncobj;
+		struct radeon_winsys_sem *ws_sem;
+		struct radv_timeline timeline;
+	};
+};
+
+struct radv_semaphore {
+	struct radv_semaphore_part permanent;
+	struct radv_semaphore_part temporary;
 };
 
+bool radv_queue_internal_submit(struct radv_queue *queue,
+				struct radeon_cmdbuf *cs);
+
 void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
 			     VkPipelineBindPoint bind_point,
 			     struct radv_descriptor_set *set,
@@ -2166,21 +2351,13 @@ struct radv_fence {
 };
 
 /* radv_nir_to_llvm.c */
-struct radv_shader_info;
-struct radv_nir_compiler_options;
+struct radv_shader_args;
 
-void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
-				 struct nir_shader *geom_shader,
-				 struct radv_shader_binary **rbinary,
-				 struct radv_shader_info *info,
-				 const struct radv_nir_compiler_options *option);
-
-void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
-			     struct radv_shader_binary **rbinary,
-			     struct radv_shader_info *info,
-			     struct nir_shader *const *nir,
-			     int nir_count,
-			     const struct radv_nir_compiler_options *options);
+void llvm_compile_shader(struct radv_device *device,
+			 unsigned shader_count,
+			 struct nir_shader *const *shaders,
+			 struct radv_shader_binary **binary,
+			 struct radv_shader_args *args);
 
 unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
 					 gl_shader_stage stage,
@@ -2193,10 +2370,101 @@ struct radv_shader_variant_key;
 void radv_nir_shader_info_pass(const struct nir_shader *nir,
 			       const struct radv_pipeline_layout *layout,
 			       const struct radv_shader_variant_key *key,
-			       struct radv_shader_info *info);
+			       struct radv_shader_info *info,
+			       bool use_aco);
 
 void radv_nir_shader_info_init(struct radv_shader_info *info);
 
+/* radv_sqtt.c */
+struct radv_thread_trace_info {
+	uint32_t cur_offset;
+	uint32_t trace_status;
+	union {
+		uint32_t gfx9_write_counter;
+		uint32_t gfx10_dropped_cntr;
+	};
+};
+
+struct radv_thread_trace_se {
+	struct radv_thread_trace_info info;
+	void *data_ptr;
+	uint32_t shader_engine;
+	uint32_t compute_unit;
+};
+
+struct radv_thread_trace {
+	uint32_t num_traces;
+	struct radv_thread_trace_se traces[4];
+};
+
+bool radv_thread_trace_init(struct radv_device *device);
+void radv_thread_trace_finish(struct radv_device *device);
+bool radv_begin_thread_trace(struct radv_queue *queue);
+bool radv_end_thread_trace(struct radv_queue *queue);
+bool radv_get_thread_trace(struct radv_queue *queue,
+			   struct radv_thread_trace *thread_trace);
+void radv_emit_thread_trace_userdata(struct radeon_cmdbuf *cs,
+				     const void *data, uint32_t num_dwords);
+
+/* radv_rgp.c */
+int radv_dump_thread_trace(struct radv_device *device,
+			   const struct radv_thread_trace *trace);
+
+/* radv_sqtt_layer_.c */
+struct radv_barrier_data {
+	union {
+		struct {
+			uint16_t depth_stencil_expand : 1;
+			uint16_t htile_hiz_range_expand : 1;
+			uint16_t depth_stencil_resummarize : 1;
+			uint16_t dcc_decompress : 1;
+			uint16_t fmask_decompress : 1;
+			uint16_t fast_clear_eliminate : 1;
+			uint16_t fmask_color_expand : 1;
+			uint16_t init_mask_ram : 1;
+			uint16_t reserved : 8;
+		};
+		uint16_t all;
+	} layout_transitions;
+};
+
+/**
+ * Value for the reason field of an RGP barrier start marker originating from
+ * the Vulkan client (does not include PAL-defined values). (Table 15)
+ */
+enum rgp_barrier_reason {
+	RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
+
+	/* External app-generated barrier reasons, i.e. API synchronization
+	 * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
+	 */
+	RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
+	RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC	  = 0x00000002,
+	RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS	  = 0x00000003,
+
+	/* Internal barrier reasons, i.e. implicit synchronization inserted by
+	 * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
+	 */
+	RGP_BARRIER_INTERNAL_BASE                             = 0xC0000000,
+	RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC        = RGP_BARRIER_INTERNAL_BASE + 0,
+	RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC       = RGP_BARRIER_INTERNAL_BASE + 1,
+	RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL	      = RGP_BARRIER_INTERNAL_BASE + 2,
+	RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
+};
+
+void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
+void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
+void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
+void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
+void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
+					   VkImageAspectFlagBits aspects);
+void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
+void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
+				 enum rgp_barrier_reason reason);
+void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
+void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
+				     const struct radv_barrier_data *barrier);
+
 struct radeon_winsys_sem;
 
 uint64_t radv_get_current_time(void);
@@ -2225,6 +2493,10 @@ si_conv_gl_prim_to_vertices(unsigned gl_prim)
 	}
 }
 
+void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
+				       const VkRenderPassBeginInfo *pRenderPassBegin);
+void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer);
+
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
 								\
 	static inline struct __radv_type *			\