radv/winsys: Add binary syncobj ABI changes for timeline semaphores.
[mesa.git] / src / amd / vulkan / radv_radeon_winsys.h
index a0b5092e300cefc0d4fbc657e1adac824f037fc1..37576327dfd6c01cc46b04f5fa555fa895e7081d 100644 (file)
 #ifndef RADV_RADEON_WINSYS_H
 #define RADV_RADEON_WINSYS_H
 
+#include <stdio.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
-#include "main/macros.h"
+#include <string.h>
+#include <vulkan/vulkan.h>
 #include "amd_family.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
 
-#define FREE(x) free(x)
+struct radeon_info;
+struct ac_surf_info;
+struct radeon_surf;
 
 enum radeon_bo_domain { /* bitfield */
        RADEON_DOMAIN_GTT  = 2,
        RADEON_DOMAIN_VRAM = 4,
-       RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
+       RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
+       RADEON_DOMAIN_GDS = 8,
+       RADEON_DOMAIN_OA = 16,
 };
 
 enum radeon_bo_flag { /* bitfield */
        RADEON_FLAG_GTT_WC =        (1 << 0),
        RADEON_FLAG_CPU_ACCESS =    (1 << 1),
        RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
+       RADEON_FLAG_VIRTUAL =       (1 << 3),
+       RADEON_FLAG_VA_UNCACHED =   (1 << 4),
+       RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
+       RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
+       RADEON_FLAG_READ_ONLY =     (1 << 7),
+       RADEON_FLAG_32BIT =         (1 << 8),
+       RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
+       RADEON_FLAG_ZERO_VRAM = (1 << 10),
 };
 
 enum radeon_bo_usage { /* bitfield */
@@ -55,78 +71,36 @@ enum radeon_bo_usage { /* bitfield */
        RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE
 };
 
-enum ring_type {
-       RING_GFX = 0,
-       RING_COMPUTE,
-       RING_DMA,
-       RING_UVD,
-       RING_VCE,
-       RING_LAST,
+enum radeon_ctx_priority {
+       RADEON_CTX_PRIORITY_INVALID = -1,
+       RADEON_CTX_PRIORITY_LOW = 0,
+       RADEON_CTX_PRIORITY_MEDIUM,
+       RADEON_CTX_PRIORITY_HIGH,
+       RADEON_CTX_PRIORITY_REALTIME,
 };
 
-struct radeon_winsys_cs {
+enum radeon_value_id {
+       RADEON_ALLOCATED_VRAM,
+       RADEON_ALLOCATED_VRAM_VIS,
+       RADEON_ALLOCATED_GTT,
+       RADEON_TIMESTAMP,
+       RADEON_NUM_BYTES_MOVED,
+       RADEON_NUM_EVICTIONS,
+       RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
+       RADEON_VRAM_USAGE,
+       RADEON_VRAM_VIS_USAGE,
+       RADEON_GTT_USAGE,
+       RADEON_GPU_TEMPERATURE,
+       RADEON_CURRENT_SCLK,
+       RADEON_CURRENT_MCLK,
+};
+
+struct radeon_cmdbuf {
        unsigned cdw;  /* Number of used dwords. */
        unsigned max_dw; /* Maximum number of dwords. */
        uint32_t *buf; /* The base pointer of the chunk. */
 };
 
-struct radeon_info {
-       /* PCI info: domain:bus:dev:func */
-       uint32_t                    pci_domain;
-       uint32_t                    pci_bus;
-       uint32_t                    pci_dev;
-       uint32_t                    pci_func;
-
-       /* Device info. */
-       uint32_t                    pci_id;
-       enum radeon_family          family;
-       const char                  *name;
-       enum chip_class             chip_class;
-       uint32_t                    gart_page_size;
-       uint64_t                    gart_size;
-       uint64_t                    vram_size;
-       uint64_t                    visible_vram_size;
-       bool                        has_dedicated_vram;
-       bool                     has_virtual_memory;
-       bool                        gfx_ib_pad_with_type2;
-       bool                     has_uvd;
-       uint32_t                    sdma_rings;
-       uint32_t                    compute_rings;
-       uint32_t                    vce_fw_version;
-       uint32_t                    vce_harvest_config;
-       uint32_t                    clock_crystal_freq;
-
-       /* Kernel info. */
-       uint32_t                    drm_major; /* version */
-       uint32_t                    drm_minor;
-       uint32_t                    drm_patchlevel;
-       bool                     has_userptr;
-
-       /* Shader cores. */
-       uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
-       uint32_t                    max_shader_clock;
-       uint32_t                    num_good_compute_units;
-       uint32_t                    max_se; /* shader engines */
-       uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
-
-       /* Render backends (color + depth blocks). */
-       uint32_t                    r300_num_gb_pipes;
-       uint32_t                    r300_num_z_pipes;
-       uint32_t                    r600_gb_backend_map; /* R600 harvest config */
-       bool                     r600_gb_backend_map_valid;
-       uint32_t                    r600_num_banks;
-       uint32_t                    num_render_backends;
-       uint32_t                    num_tile_pipes; /* pipe count from PIPE_CONFIG */
-       uint32_t                    pipe_interleave_bytes;
-       uint32_t                    enabled_rb_mask; /* GCN harvest config */
-
-       /* Tile modes. */
-       uint32_t                    si_tile_mode_array[32];
-       uint32_t                    cik_macrotile_mode_array[16];
-};
-
-#define RADEON_SURF_MAX_LEVEL                   32
-
 #define RADEON_SURF_TYPE_MASK                   0xFF
 #define RADEON_SURF_TYPE_SHIFT                  0
 #define     RADEON_SURF_TYPE_1D                     0
@@ -137,88 +111,11 @@ struct radeon_info {
 #define     RADEON_SURF_TYPE_2D_ARRAY               5
 #define RADEON_SURF_MODE_MASK                   0xFF
 #define RADEON_SURF_MODE_SHIFT                  8
-#define     RADEON_SURF_MODE_LINEAR_ALIGNED         1
-#define     RADEON_SURF_MODE_1D                     2
-#define     RADEON_SURF_MODE_2D                     3
-#define RADEON_SURF_SCANOUT                     (1 << 16)
-#define RADEON_SURF_ZBUFFER                     (1 << 17)
-#define RADEON_SURF_SBUFFER                     (1 << 18)
-#define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
-#define RADEON_SURF_HAS_SBUFFER_MIPTREE         (1 << 19)
-#define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
-#define RADEON_SURF_FMASK                       (1 << 21)
-#define RADEON_SURF_DISABLE_DCC                 (1 << 22)
 
 #define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
 #define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
 #define RADEON_SURF_CLR(v, field)   ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
 
-struct radeon_surf_level {
-       uint64_t                    offset;
-       uint64_t                    slice_size;
-       uint32_t                    npix_x;
-       uint32_t                    npix_y;
-       uint32_t                    npix_z;
-       uint32_t                    nblk_x;
-       uint32_t                    nblk_y;
-       uint32_t                    nblk_z;
-       uint32_t                    pitch_bytes;
-       uint32_t                    mode;
-       uint64_t                    dcc_offset;
-       uint64_t                    dcc_fast_clear_size;
-       bool                        dcc_enabled;
-};
-
-
-/* surface defintions from the winsys */
-struct radeon_surf {
-       /* These are inputs to the calculator. */
-       uint32_t                    npix_x;
-       uint32_t                    npix_y;
-       uint32_t                    npix_z;
-       uint32_t                    blk_w;
-       uint32_t                    blk_h;
-       uint32_t                    blk_d;
-       uint32_t                    array_size;
-       uint32_t                    last_level;
-       uint32_t                    bpe;
-       uint32_t                    nsamples;
-       uint32_t                    flags;
-
-       /* These are return values. Some of them can be set by the caller, but
-        * they will be treated as hints (e.g. bankw, bankh) and might be
-        * changed by the calculator.
-        */
-       uint64_t                    bo_size;
-       uint64_t                    bo_alignment;
-       /* This applies to EG and later. */
-       uint32_t                    bankw;
-       uint32_t                    bankh;
-       uint32_t                    mtilea;
-       uint32_t                    tile_split;
-       uint32_t                    stencil_tile_split;
-       uint64_t                    stencil_offset;
-       struct radeon_surf_level    level[RADEON_SURF_MAX_LEVEL];
-       struct radeon_surf_level    stencil_level[RADEON_SURF_MAX_LEVEL];
-       uint32_t                    tiling_index[RADEON_SURF_MAX_LEVEL];
-       uint32_t                    stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
-       uint32_t                    pipe_config;
-       uint32_t                    num_banks;
-       uint32_t                    macro_tile_index;
-       uint32_t                    micro_tile_mode; /* displayable, thin, depth, rotated */
-
-       /* Whether the depth miptree or stencil miptree as used by the DB are
-        * adjusted from their TC compatible form to ensure depth/stencil
-        * compatibility. If either is true, the corresponding plane cannot be
-        * sampled from.
-        */
-       bool                        depth_adjusted;
-       bool                        stencil_adjusted;
-
-       uint64_t                    dcc_size;
-       uint64_t                    dcc_alignment;
-};
-
 enum radeon_bo_layout {
        RADEON_LAYOUT_LINEAR = 0,
        RADEON_LAYOUT_TILED,
@@ -232,16 +129,26 @@ struct radeon_bo_metadata {
        /* Tiling flags describing the texture layout for display code
         * and DRI sharing.
         */
-       enum radeon_bo_layout   microtile;
-       enum radeon_bo_layout   macrotile;
-       unsigned                pipe_config;
-       unsigned                bankw;
-       unsigned                bankh;
-       unsigned                tile_split;
-       unsigned                mtilea;
-       unsigned                num_banks;
-       unsigned                stride;
-       bool                    scanout;
+       union {
+               struct {
+                       enum radeon_bo_layout   microtile;
+                       enum radeon_bo_layout   macrotile;
+                       unsigned                pipe_config;
+                       unsigned                bankw;
+                       unsigned                bankh;
+                       unsigned                tile_split;
+                       unsigned                mtilea;
+                       unsigned                num_banks;
+                       unsigned                stride;
+                       bool                    scanout;
+               } legacy;
+
+               struct {
+                       /* surface flags */
+                       unsigned swizzle_mode:5;
+                       bool scanout;
+               } gfx9;
+       } u;
 
        /* Additional metadata associated with the buffer, in bytes.
         * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
@@ -251,9 +158,54 @@ struct radeon_bo_metadata {
        uint32_t                metadata[64];
 };
 
-struct radeon_winsys_bo;
 struct radeon_winsys_fence;
-struct radeon_winsys_sem;
+struct radeon_winsys_ctx;
+
+struct radeon_winsys_bo {
+       uint64_t va;
+       bool is_local;
+       bool vram_no_cpu_access;
+};
+struct radv_winsys_sem_counts {
+       uint32_t syncobj_count;
+       uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
+       uint32_t sem_count;
+       uint32_t *syncobj;
+       struct radeon_winsys_sem **sem;
+};
+
+struct radv_winsys_sem_info {
+       bool cs_emit_signal;
+       bool cs_emit_wait;
+       struct radv_winsys_sem_counts wait;
+       struct radv_winsys_sem_counts signal;
+};
+
+struct radv_winsys_bo_list {
+       struct radeon_winsys_bo **bos;
+       unsigned count;
+};
+
+/* Kernel effectively allows 0-31. This sets some priorities for fixed
+ * functionality buffers */
+enum {
+       RADV_BO_PRIORITY_APPLICATION_MAX = 28,
+
+       /* virtual buffers have 0 priority since the priority is not used. */
+       RADV_BO_PRIORITY_VIRTUAL = 0,
+
+       /* This should be considerably lower than most of the stuff below,
+        * but how much lower is hard to say since we don't know application
+        * assignments. Put it pretty high since it is GTT anyway. */
+       RADV_BO_PRIORITY_QUERY_POOL = 29,
+
+       RADV_BO_PRIORITY_DESCRIPTOR = 30,
+       RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
+       RADV_BO_PRIORITY_FENCE = 30,
+       RADV_BO_PRIORITY_SHADER = 31,
+       RADV_BO_PRIORITY_SCRATCH = 31,
+       RADV_BO_PRIORITY_CS = 31,
+};
 
 struct radeon_winsys {
        void (*destroy)(struct radeon_winsys *ws);
@@ -261,94 +213,158 @@ struct radeon_winsys {
        void (*query_info)(struct radeon_winsys *ws,
                           struct radeon_info *info);
 
+       uint64_t (*query_value)(struct radeon_winsys *ws,
+                               enum radeon_value_id value);
+
+       bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
+                              unsigned num_registers, uint32_t *out);
+
+       const char *(*get_chip_name)(struct radeon_winsys *ws);
+
        struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws,
                                                  uint64_t size,
                                                  unsigned alignment,
                                                  enum radeon_bo_domain domain,
-                                                 enum radeon_bo_flag flags);
+                                                 enum radeon_bo_flag flags,
+                                                 unsigned priority);
 
        void (*buffer_destroy)(struct radeon_winsys_bo *bo);
        void *(*buffer_map)(struct radeon_winsys_bo *bo);
 
+       struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
+                                                   void *pointer,
+                                                   uint64_t size,
+                                                   unsigned priority);
+
        struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
                                                   int fd,
-                                                  unsigned *stride, unsigned *offset);
+                                                  unsigned priority,
+                                                  uint64_t *alloc_size);
 
        bool (*buffer_get_fd)(struct radeon_winsys *ws,
                              struct radeon_winsys_bo *bo,
                              int *fd);
 
-       void (*buffer_unmap)(struct radeon_winsys_bo *bo);
+       bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
+                                        enum radeon_bo_domain *domains,
+                                        enum radeon_bo_flag *flags);
 
-       uint64_t (*buffer_get_va)(struct radeon_winsys_bo *bo);
+       void (*buffer_unmap)(struct radeon_winsys_bo *bo);
 
        void (*buffer_set_metadata)(struct radeon_winsys_bo *bo,
                                    struct radeon_bo_metadata *md);
-       struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
+       void (*buffer_get_metadata)(struct radeon_winsys_bo *bo,
+                                   struct radeon_bo_metadata *md);
+
+       VkResult (*buffer_virtual_bind)(struct radeon_winsys_bo *parent,
+                                       uint64_t offset, uint64_t size,
+                                       struct radeon_winsys_bo *bo, uint64_t bo_offset);
+       VkResult (*ctx_create)(struct radeon_winsys *ws,
+                              enum radeon_ctx_priority priority,
+                              struct radeon_winsys_ctx **ctx);
        void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
 
        bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
                              enum ring_type ring_type, int ring_index);
 
-       struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
+       struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws,
                                              enum ring_type ring_type);
 
-       void (*cs_destroy)(struct radeon_winsys_cs *cs);
+       void (*cs_destroy)(struct radeon_cmdbuf *cs);
 
-       void (*cs_reset)(struct radeon_winsys_cs *cs);
+       void (*cs_reset)(struct radeon_cmdbuf *cs);
 
-       bool (*cs_finalize)(struct radeon_winsys_cs *cs);
+       VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
 
-       void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size);
+       void (*cs_grow)(struct radeon_cmdbuf * cs, size_t min_size);
 
-       int (*cs_submit)(struct radeon_winsys_ctx *ctx,
-                        int queue_index,
-                        struct radeon_winsys_cs **cs_array,
-                        unsigned cs_count,
-                        struct radeon_winsys_sem **wait_sem,
-                        unsigned wait_sem_count,
-                        struct radeon_winsys_sem **signal_sem,
-                        unsigned signal_sem_count,
-                        bool can_patch,
-                        struct radeon_winsys_fence *fence);
+       VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx,
+                             int queue_index,
+                             struct radeon_cmdbuf **cs_array,
+                             unsigned cs_count,
+                             struct radeon_cmdbuf *initial_preamble_cs,
+                             struct radeon_cmdbuf *continue_preamble_cs,
+                             struct radv_winsys_sem_info *sem_info,
+                             const struct radv_winsys_bo_list *bo_list, /* optional */
+                             bool can_patch,
+                             struct radeon_winsys_fence *fence);
 
-       void (*cs_add_buffer)(struct radeon_winsys_cs *cs,
-                             struct radeon_winsys_bo *bo,
-                             uint8_t priority);
+       void (*cs_add_buffer)(struct radeon_cmdbuf *cs,
+                             struct radeon_winsys_bo *bo);
 
-       void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
-                                   struct radeon_winsys_cs *child);
+       void (*cs_execute_secondary)(struct radeon_cmdbuf *parent,
+                                   struct radeon_cmdbuf *child);
 
-       void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
+       void (*cs_dump)(struct radeon_cmdbuf *cs, FILE* file, const int *trace_ids, int trace_id_count);
 
        int (*surface_init)(struct radeon_winsys *ws,
-                           struct radeon_surf *surf);
-
-       int (*surface_best)(struct radeon_winsys *ws,
+                           const struct ac_surf_info *surf_info,
                            struct radeon_surf *surf);
 
        struct radeon_winsys_fence *(*create_fence)();
        void (*destroy_fence)(struct radeon_winsys_fence *fence);
+       void (*reset_fence)(struct radeon_winsys_fence *fence);
+       void (*signal_fence)(struct radeon_winsys_fence *fence);
+       bool (*is_fence_waitable)(struct radeon_winsys_fence *fence);
        bool (*fence_wait)(struct radeon_winsys *ws,
                           struct radeon_winsys_fence *fence,
                           bool absolute,
                           uint64_t timeout);
+       bool (*fences_wait)(struct radeon_winsys *ws,
+                           struct radeon_winsys_fence *const *fences,
+                           uint32_t fence_count,
+                           bool wait_all,
+                           uint64_t timeout);
 
+       /* old semaphores - non shareable */
        struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
        void (*destroy_sem)(struct radeon_winsys_sem *sem);
 
+       /* new shareable sync objects */
+       int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled,
+                             uint32_t *handle);
+       void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+
+       void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+       void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+       bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
+                            bool wait_all, uint64_t timeout);
+
+       int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+       int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
+
+       int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+
+       /* Note that this, unlike the normal import, uses an existing syncobj. */
+       int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
+
 };
 
-static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
+static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
 {
        cs->buf[cs->cdw++] = value;
 }
 
-static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
+static inline void radeon_emit_array(struct radeon_cmdbuf *cs,
                                     const uint32_t *values, unsigned count)
 {
        memcpy(cs->buf + cs->cdw, values, count * 4);
        cs->cdw += count;
 }
 
+static inline uint64_t radv_buffer_get_va(struct radeon_winsys_bo *bo)
+{
+       return bo->va;
+}
+
+static inline void radv_cs_add_buffer(struct radeon_winsys *ws,
+                                     struct radeon_cmdbuf *cs,
+                                     struct radeon_winsys_bo *bo)
+{
+       if (bo->is_local)
+               return;
+
+       ws->cs_add_buffer(cs, bo);
+}
+
 #endif /* RADV_RADEON_WINSYS_H */