radv: Emit a BATCH_BREAK when changing pixel shaders or CB_TARGET_MASK.

[mesa.git] / src / amd / vulkan / radv_private.h
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h

index 20a3cd389036d51a90e8fc5850b187877e8d04cb..4dccf23acc7a3a4f63708d638bf968c06c4b24e6 100644 (file)
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -40,7 +40,7 @@
  #include <memcheck.h>
  #define VG(x) x
  #else
-#define VG(x)
+#define VG(x) ((void)0)
  #endif
  
  #include "c11/threads.h"
@@ -76,6 +76,7 @@ typedef uint32_t xcb_window_t;
  
  #include <vulkan/vulkan.h>
  #include <vulkan/vulkan_intel.h>
+#include <vulkan/vulkan_android.h>
  #include <vulkan/vk_icd.h>
  #include <vulkan/vk_android_native_buffer.h>
  
@@ -84,6 +85,19 @@ typedef uint32_t xcb_window_t;
  #include "wsi_common.h"
  #include "wsi_common_display.h"
  
+/* Helper to determine if we should compile
+ * any of the Android AHB support.
+ *
+ * To actually enable the ext we also need
+ * the necessary kernel support.
+ */
+#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
+#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 1
+#else
+#define RADV_SUPPORT_ANDROID_HARDWARE_BUFFER 0
+#endif
+
+
  struct gfx10_format {
      unsigned img_format:9;
  
@@ -107,9 +121,25 @@ enum radv_mem_type {
         RADV_MEM_TYPE_GTT_WRITE_COMBINE,
         RADV_MEM_TYPE_VRAM_CPU_ACCESS,
         RADV_MEM_TYPE_GTT_CACHED,
+       RADV_MEM_TYPE_VRAM_UNCACHED,
+       RADV_MEM_TYPE_GTT_WRITE_COMBINE_VRAM_UNCACHED,
+       RADV_MEM_TYPE_VRAM_CPU_ACCESS_UNCACHED,
+       RADV_MEM_TYPE_GTT_CACHED_VRAM_UNCACHED,
         RADV_MEM_TYPE_COUNT
  };
  
+enum radv_secure_compile_type {
+       RADV_SC_TYPE_INIT_SUCCESS,
+       RADV_SC_TYPE_INIT_FAILURE,
+       RADV_SC_TYPE_COMPILE_PIPELINE,
+       RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED,
+       RADV_SC_TYPE_READ_DISK_CACHE,
+       RADV_SC_TYPE_WRITE_DISK_CACHE,
+       RADV_SC_TYPE_FORK_DEVICE,
+       RADV_SC_TYPE_DESTROY_DEVICE,
+       RADV_SC_TYPE_COUNT
+};
+
  #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
  
  static inline uint32_t
@@ -240,7 +270,7 @@ void radv_logi_v(const char *format, va_list va);
                                 fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
                 })
  #else
-#define radv_assert(x)
+#define radv_assert(x) do {} while(0)
  #endif
  
  #define stub_return(v)                                 \
@@ -329,6 +359,7 @@ struct radv_instance {
  
         uint64_t debug_flags;
         uint64_t perftest_flags;
+       uint8_t num_sc_threads;
  
         struct vk_debug_report_instance             debug_report_callbacks;
  
@@ -338,6 +369,12 @@ struct radv_instance {
         struct driOptionCache available_dri_options;
  };
  
+static inline
+bool radv_device_use_secure_compile(struct radv_instance *instance)
+{
+   return instance->num_sc_threads;
+}
+
  VkResult radv_init_wsi(struct radv_physical_device *physical_device);
  void radv_finish_wsi(struct radv_physical_device *physical_device);
  
@@ -379,6 +416,11 @@ struct radv_pipeline_key {
         uint32_t has_multiview_view_index : 1;
         uint32_t optimisations_disabled : 1;
         uint8_t topology;
+
+       /* Non-zero if a required subgroup size is specified via
+        * VK_EXT_subgroup_size_control.
+        */
+       uint8_t compute_subgroup_size;
  };
  
  struct radv_shader_binary;
@@ -461,10 +503,15 @@ struct radv_meta_state {
                 VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
                 VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
                 VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-       } clear[1 + MAX_SAMPLES_LOG2];
+
+               VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+               VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+               VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+       } clear[MAX_SAMPLES_LOG2];
  
         VkPipelineLayout                          clear_color_p_layout;
         VkPipelineLayout                          clear_depth_p_layout;
+       VkPipelineLayout                          clear_depth_unrestricted_p_layout;
  
         /* Optimized compute fast HTILE clear for stencil or depth only. */
         VkPipeline clear_htile_mask_pipeline;
@@ -504,7 +551,7 @@ struct radv_meta_state {
                 VkPipeline depth_only_pipeline[5];
  
                 VkPipeline stencil_only_pipeline[5];
-       } blit2d[1 + MAX_SAMPLES_LOG2];
+       } blit2d[MAX_SAMPLES_LOG2];
  
         VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
         VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
@@ -606,10 +653,10 @@ struct radv_meta_state {
  
         struct {
                 VkPipelineLayout                          p_layout;
-               VkPipeline                                decompress_pipeline;
+               VkPipeline                                decompress_pipeline[NUM_DEPTH_DECOMPRESS_PIPELINES];
                 VkPipeline                                resummarize_pipeline;
                 VkRenderPass                              pass;
-       } depth_decomp[1 + MAX_SAMPLES_LOG2];
+       } depth_decomp[MAX_SAMPLES_LOG2];
  
         struct {
                 VkPipelineLayout                          p_layout;
@@ -666,8 +713,10 @@ struct radv_queue {
         int queue_idx;
         VkDeviceQueueCreateFlags flags;
  
-       uint32_t scratch_size;
-       uint32_t compute_scratch_size;
+       uint32_t scratch_size_per_wave;
+       uint32_t scratch_waves;
+       uint32_t compute_scratch_size_per_wave;
+       uint32_t compute_scratch_waves;
         uint32_t esgs_ring_size;
         uint32_t gsvs_ring_size;
         bool has_tess_rings;
@@ -685,6 +734,9 @@ struct radv_queue {
         struct radeon_cmdbuf *initial_preamble_cs;
         struct radeon_cmdbuf *initial_full_flush_preamble_cs;
         struct radeon_cmdbuf *continue_preamble_cs;
+
+       struct list_head pending_submissions;
+       pthread_mutex_t pending_mutex;
  };
  
  struct radv_bo_list {
@@ -693,6 +745,36 @@ struct radv_bo_list {
         pthread_mutex_t mutex;
  };
  
+struct radv_secure_compile_process {
+       /* Secure process file descriptors. Used to communicate between the
+        * user facing device and the idle forked device used to fork a clean
+        * process for each new pipeline compile.
+        */
+       int fd_secure_input;
+       int fd_secure_output;
+
+       /* FIFO file descriptors used to communicate between the user facing
+        * device and the secure process that does the actual secure compile.
+        */
+       int fd_server;
+       int fd_client;
+
+       /* Secure compile process id */
+       pid_t sc_pid;
+
+       /* Is the secure compile process currently in use by a thread */
+       bool in_use;
+};
+
+struct radv_secure_compile_state {
+       struct radv_secure_compile_process *secure_compile_processes;
+       uint32_t secure_compile_thread_counter;
+       mtx_t secure_compile_mutex;
+
+       /* Unique process ID used to build name for FIFO file descriptor */
+       char *uid;
+};
+
  struct radv_device {
         VK_LOADER_DATA                              _loader_data;
  
@@ -763,6 +845,12 @@ struct radv_device {
  
         /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
         int force_aniso;
+
+       struct radv_secure_compile_state *sc_state;
+
+       /* Condition variable for legacy timelines, to notify waiters when a
+        * new point gets submitted. */
+       pthread_cond_t timeline_cond;
  };
  
  struct radv_device_memory {
@@ -774,6 +862,10 @@ struct radv_device_memory {
         VkDeviceSize                                 map_size;
         void *                                       map;
         void *                                       user_ptr;
+
+#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
+       struct AHardwareBuffer *                    android_hardware_buffer;
+#endif
  };
  
  
@@ -1090,6 +1182,9 @@ radv_initialise_ds_surface(struct radv_device *device,
                            struct radv_ds_buffer_info *ds,
                            struct radv_image_view *iview);
  
+bool
+radv_sc_read(int fd, void *buf, size_t size, bool timeout);
+
  /**
   * Attachment state when recording a renderpass instance.
   *
@@ -1100,6 +1195,7 @@ struct radv_attachment_state {
         uint32_t                                     cleared_views;
         VkClearValue                                 clear_value;
         VkImageLayout                                current_layout;
+       VkImageLayout                                current_stencil_layout;
         bool                                         current_in_render_loop;
         struct radv_sample_locations_state           sample_location;
  
@@ -1229,8 +1325,10 @@ struct radv_cmd_buffer {
  
         struct radv_cmd_buffer_upload upload;
  
-       uint32_t scratch_size_needed;
-       uint32_t compute_scratch_size_needed;
+       uint32_t scratch_size_per_wave_needed;
+       uint32_t scratch_waves_wanted;
+       uint32_t compute_scratch_size_per_wave_needed;
+       uint32_t compute_scratch_waves_wanted;
         uint32_t esgs_ring_size_needed;
         uint32_t gsvs_ring_size_needed;
         bool tess_rings_needed;
@@ -1424,12 +1522,11 @@ struct radv_shader_module;
  
  #define RADV_HASH_SHADER_IS_GEOM_COPY_SHADER (1 << 0)
  #define RADV_HASH_SHADER_SISCHED             (1 << 1)
-#define RADV_HASH_SHADER_UNSAFE_MATH         (1 << 2)
-#define RADV_HASH_SHADER_NO_NGG              (1 << 3)
-#define RADV_HASH_SHADER_CS_WAVE32           (1 << 4)
-#define RADV_HASH_SHADER_PS_WAVE32           (1 << 5)
-#define RADV_HASH_SHADER_GE_WAVE32           (1 << 6)
-#define RADV_HASH_SHADER_ACO                 (1 << 7)
+#define RADV_HASH_SHADER_NO_NGG              (1 << 2)
+#define RADV_HASH_SHADER_CS_WAVE32           (1 << 3)
+#define RADV_HASH_SHADER_PS_WAVE32           (1 << 4)
+#define RADV_HASH_SHADER_GE_WAVE32           (1 << 5)
+#define RADV_HASH_SHADER_ACO                 (1 << 6)
  
  void
  radv_hash_shaders(unsigned char *hash,
@@ -1588,6 +1685,15 @@ radv_graphics_pipeline_create(VkDevice device,
                               const VkAllocationCallbacks *alloc,
                               VkPipeline *pPipeline);
  
+struct radv_binning_settings {
+       unsigned context_states_per_bin; /* allowed range: [1, 6] */
+       unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+       unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+};
+
+struct radv_binning_settings
+radv_get_binning_settings(const struct radv_physical_device *pdev);
+
  struct vk_format_description;
  uint32_t radv_translate_buffer_dataformat(const struct vk_format_description *desc,
                                           int first_non_void);
@@ -1896,17 +2002,42 @@ struct radv_image_create_info {
         const struct radeon_bo_metadata *bo_metadata;
  };
  
+VkResult
+radv_image_create_layout(struct radv_device *device,
+                         struct radv_image_create_info create_info,
+                         struct radv_image *image);
+
  VkResult radv_image_create(VkDevice _device,
                            const struct radv_image_create_info *info,
                            const VkAllocationCallbacks* alloc,
                            VkImage *pImage);
  
+bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
+
  VkResult
  radv_image_from_gralloc(VkDevice device_h,
                         const VkImageCreateInfo *base_info,
                         const VkNativeBufferANDROID *gralloc_info,
                         const VkAllocationCallbacks *alloc,
                         VkImage *out_image_h);
+uint64_t
+radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+                             const VkImageUsageFlags vk_usage);
+VkResult
+radv_import_ahb_memory(struct radv_device *device,
+                       struct radv_device_memory *mem,
+                       unsigned priority,
+                       const VkImportAndroidHardwareBufferInfoANDROID *info);
+VkResult
+radv_create_ahb_memory(struct radv_device *device,
+                       struct radv_device_memory *mem,
+                       unsigned priority,
+                       const VkMemoryAllocateInfo *pAllocateInfo);
+
+VkFormat
+radv_select_android_external_format(const void *next, VkFormat default_format);
+
+bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
  
  struct radv_image_view_extra_create_info {
         bool disable_compression;
@@ -2007,6 +2138,7 @@ void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
  struct radv_subpass_attachment {
         uint32_t         attachment;
         VkImageLayout    layout;
+       VkImageLayout    stencil_layout;
         bool             in_render_loop;
  };
  
@@ -2033,6 +2165,9 @@ struct radv_subpass {
         struct radv_subpass_barrier                  start_barrier;
  
         uint32_t                                     view_mask;
+
+       VkSampleCountFlagBits                        color_sample_count;
+       VkSampleCountFlagBits                        depth_sample_count;
         VkSampleCountFlagBits                        max_sample_count;
  };
  
@@ -2046,6 +2181,8 @@ struct radv_render_pass_attachment {
         VkAttachmentLoadOp                           stencil_load_op;
         VkImageLayout                                initial_layout;
         VkImageLayout                                final_layout;
+       VkImageLayout                                stencil_initial_layout;
+       VkImageLayout                                stencil_final_layout;
  
         /* The subpass id in which the attachment will be used first/last. */
         uint32_t                                     first_subpass_idx;
@@ -2074,11 +2211,62 @@ struct radv_query_pool {
         uint32_t pipeline_stats_mask;
  };
  
-struct radv_semaphore {
-       /* use a winsys sem for non-exportable */
-       struct radeon_winsys_sem *sem;
+typedef enum {
+       RADV_SEMAPHORE_NONE,
+       RADV_SEMAPHORE_WINSYS,
+       RADV_SEMAPHORE_SYNCOBJ,
+       RADV_SEMAPHORE_TIMELINE,
+} radv_semaphore_kind;
+
+struct radv_deferred_queue_submission;
+
+struct radv_timeline_waiter {
+       struct list_head list;
+       struct radv_deferred_queue_submission *submission;
+       uint64_t value;
+};
+
+struct radv_timeline_point {
+       struct list_head list;
+
+       uint64_t value;
         uint32_t syncobj;
-       uint32_t temp_syncobj;
+
+       /* Separate from the list to accomodate CPU wait being async, as well
+        * as prevent point deletion during submission. */
+       unsigned wait_count;
+};
+
+struct radv_timeline {
+       /* Using a pthread mutex to be compatible with condition variables. */
+       pthread_mutex_t mutex;
+
+       uint64_t highest_signaled;
+       uint64_t highest_submitted;
+
+       struct list_head points;
+
+       /* Keep free points on hand so we do not have to recreate syncobjs all
+        * the time. */
+       struct list_head free_points;
+
+       /* Submissions that are deferred waiting for a specific value to be
+        * submitted. */
+       struct list_head waiters;
+};
+
+struct radv_semaphore_part {
+       radv_semaphore_kind kind;
+       union {
+               uint32_t syncobj;
+               struct radeon_winsys_sem *ws_sem;
+               struct radv_timeline timeline;
+       };
+};
+
+struct radv_semaphore {
+       struct radv_semaphore_part permanent;
+       struct radv_semaphore_part temporary;
  };
  
  void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
@@ -2126,21 +2314,18 @@ struct radv_fence {
  };
  
  /* radv_nir_to_llvm.c */
-struct radv_shader_info;
-struct radv_nir_compiler_options;
+struct radv_shader_args;
  
  void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
                                  struct nir_shader *geom_shader,
                                  struct radv_shader_binary **rbinary,
-                                struct radv_shader_info *info,
-                                const struct radv_nir_compiler_options *option);
+                                const struct radv_shader_args *args);
  
  void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
                              struct radv_shader_binary **rbinary,
-                            struct radv_shader_info *info,
+                            const struct radv_shader_args *args,
                              struct nir_shader *const *nir,
-                            int nir_count,
-                            const struct radv_nir_compiler_options *options);
+                            int nir_count);
  
  unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
                                          gl_shader_stage stage,