X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_radeon_winsys.h;h=37576327dfd6c01cc46b04f5fa555fa895e7081d;hb=fa97061a8235b64009d7897ecf20cc81258f3403;hp=5614d8b378dd92c721e56a0c59fe562f400e2984;hpb=fe6d9c0825fafa4f0279651b7b38fae6d725b69f;p=mesa.git diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index 5614d8b378d..37576327dfd 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -29,25 +29,40 @@ #ifndef RADV_RADEON_WINSYS_H #define RADV_RADEON_WINSYS_H +#include #include #include #include -#include "main/macros.h" +#include +#include #include "amd_family.h" +#include "util/u_memory.h" +#include "util/u_math.h" -#define FREE(x) free(x) +struct radeon_info; +struct ac_surf_info; +struct radeon_surf; enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_GTT = 2, RADEON_DOMAIN_VRAM = 4, - RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT + RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT, + RADEON_DOMAIN_GDS = 8, + RADEON_DOMAIN_OA = 16, }; enum radeon_bo_flag { /* bitfield */ RADEON_FLAG_GTT_WC = (1 << 0), RADEON_FLAG_CPU_ACCESS = (1 << 1), RADEON_FLAG_NO_CPU_ACCESS = (1 << 2), - RADEON_FLAG_VIRTUAL = (1 << 3) + RADEON_FLAG_VIRTUAL = (1 << 3), + RADEON_FLAG_VA_UNCACHED = (1 << 4), + RADEON_FLAG_IMPLICIT_SYNC = (1 << 5), + RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6), + RADEON_FLAG_READ_ONLY = (1 << 7), + RADEON_FLAG_32BIT = (1 << 8), + RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9), + RADEON_FLAG_ZERO_VRAM = (1 << 10), }; enum radeon_bo_usage { /* bitfield */ @@ -56,78 +71,36 @@ enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE }; -enum ring_type { - RING_GFX = 0, - RING_COMPUTE, - RING_DMA, - RING_UVD, - RING_VCE, - RING_LAST, +enum radeon_ctx_priority { + RADEON_CTX_PRIORITY_INVALID = -1, + RADEON_CTX_PRIORITY_LOW = 0, + RADEON_CTX_PRIORITY_MEDIUM, + RADEON_CTX_PRIORITY_HIGH, + RADEON_CTX_PRIORITY_REALTIME, }; -struct radeon_winsys_cs { +enum radeon_value_id { + RADEON_ALLOCATED_VRAM, + RADEON_ALLOCATED_VRAM_VIS, + RADEON_ALLOCATED_GTT, + RADEON_TIMESTAMP, + RADEON_NUM_BYTES_MOVED, + RADEON_NUM_EVICTIONS, + RADEON_NUM_VRAM_CPU_PAGE_FAULTS, + RADEON_VRAM_USAGE, + RADEON_VRAM_VIS_USAGE, + RADEON_GTT_USAGE, + RADEON_GPU_TEMPERATURE, + RADEON_CURRENT_SCLK, + RADEON_CURRENT_MCLK, +}; + +struct radeon_cmdbuf { unsigned cdw; /* Number of used dwords. */ unsigned max_dw; /* Maximum number of dwords. */ uint32_t *buf; /* The base pointer of the chunk. */ }; -struct radeon_info { - /* PCI info: domain:bus:dev:func */ - uint32_t pci_domain; - uint32_t pci_bus; - uint32_t pci_dev; - uint32_t pci_func; - - /* Device info. */ - uint32_t pci_id; - enum radeon_family family; - const char *name; - enum chip_class chip_class; - uint32_t gart_page_size; - uint64_t gart_size; - uint64_t vram_size; - uint64_t visible_vram_size; - bool has_dedicated_vram; - bool has_virtual_memory; - bool gfx_ib_pad_with_type2; - bool has_uvd; - uint32_t sdma_rings; - uint32_t compute_rings; - uint32_t vce_fw_version; - uint32_t vce_harvest_config; - uint32_t clock_crystal_freq; /* in kHz */ - - /* Kernel info. */ - uint32_t drm_major; /* version */ - uint32_t drm_minor; - uint32_t drm_patchlevel; - bool has_userptr; - - /* Shader cores. */ - uint32_t r600_max_quad_pipes; /* wave size / 16 */ - uint32_t max_shader_clock; - uint32_t num_good_compute_units; - uint32_t max_se; /* shader engines */ - uint32_t max_sh_per_se; /* shader arrays per shader engine */ - - /* Render backends (color + depth blocks). */ - uint32_t r300_num_gb_pipes; - uint32_t r300_num_z_pipes; - uint32_t r600_gb_backend_map; /* R600 harvest config */ - bool r600_gb_backend_map_valid; - uint32_t r600_num_banks; - uint32_t num_render_backends; - uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */ - uint32_t pipe_interleave_bytes; - uint32_t enabled_rb_mask; /* GCN harvest config */ - - /* Tile modes. */ - uint32_t si_tile_mode_array[32]; - uint32_t cik_macrotile_mode_array[16]; -}; - -#define RADEON_SURF_MAX_LEVEL 32 - #define RADEON_SURF_TYPE_MASK 0xFF #define RADEON_SURF_TYPE_SHIFT 0 #define RADEON_SURF_TYPE_1D 0 @@ -138,92 +111,11 @@ struct radeon_info { #define RADEON_SURF_TYPE_2D_ARRAY 5 #define RADEON_SURF_MODE_MASK 0xFF #define RADEON_SURF_MODE_SHIFT 8 -#define RADEON_SURF_MODE_LINEAR_ALIGNED 1 -#define RADEON_SURF_MODE_1D 2 -#define RADEON_SURF_MODE_2D 3 -#define RADEON_SURF_SCANOUT (1 << 16) -#define RADEON_SURF_ZBUFFER (1 << 17) -#define RADEON_SURF_SBUFFER (1 << 18) -#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER) -#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19) -#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20) -#define RADEON_SURF_FMASK (1 << 21) -#define RADEON_SURF_DISABLE_DCC (1 << 22) -#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23) #define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK) #define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT) #define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT)) -struct radeon_surf_info { - uint32_t width; - uint32_t height; - uint32_t depth; - uint32_t samples; - uint32_t array_size; - uint32_t levels; -}; - -struct radeon_surf_level { - uint64_t offset; - uint64_t slice_size; - uint32_t nblk_x; - uint32_t nblk_y; - uint32_t nblk_z; - uint32_t pitch_bytes; - uint32_t mode; - uint64_t dcc_offset; - uint64_t dcc_fast_clear_size; - bool dcc_enabled; -}; - - -/* surface defintions from the winsys */ -struct radeon_surf { - /* These are inputs to the calculator. */ - uint32_t blk_w; - uint32_t blk_h; - uint32_t bpe; - uint32_t flags; - - /* These are return values. Some of them can be set by the caller, but - * they will be treated as hints (e.g. bankw, bankh) and might be - * changed by the calculator. - */ - uint64_t bo_size; - uint64_t bo_alignment; - /* This applies to EG and later. */ - uint32_t bankw; - uint32_t bankh; - uint32_t mtilea; - uint32_t tile_split; - uint32_t stencil_tile_split; - uint64_t stencil_offset; - struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL]; - struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL]; - uint32_t tiling_index[RADEON_SURF_MAX_LEVEL]; - uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL]; - uint32_t pipe_config; - uint32_t num_banks; - uint32_t macro_tile_index; - uint32_t micro_tile_mode; /* displayable, thin, depth, rotated */ - - /* Whether the depth miptree or stencil miptree as used by the DB are - * adjusted from their TC compatible form to ensure depth/stencil - * compatibility. If either is true, the corresponding plane cannot be - * sampled from. - */ - bool depth_adjusted; - bool stencil_adjusted; - - uint64_t dcc_size; - uint64_t dcc_alignment; - - uint64_t htile_size; - uint64_t htile_slice_size; - uint64_t htile_alignment; -}; - enum radeon_bo_layout { RADEON_LAYOUT_LINEAR = 0, RADEON_LAYOUT_TILED, @@ -237,16 +129,26 @@ struct radeon_bo_metadata { /* Tiling flags describing the texture layout for display code * and DRI sharing. */ - enum radeon_bo_layout microtile; - enum radeon_bo_layout macrotile; - unsigned pipe_config; - unsigned bankw; - unsigned bankh; - unsigned tile_split; - unsigned mtilea; - unsigned num_banks; - unsigned stride; - bool scanout; + union { + struct { + enum radeon_bo_layout microtile; + enum radeon_bo_layout macrotile; + unsigned pipe_config; + unsigned bankw; + unsigned bankh; + unsigned tile_split; + unsigned mtilea; + unsigned num_banks; + unsigned stride; + bool scanout; + } legacy; + + struct { + /* surface flags */ + unsigned swizzle_mode:5; + bool scanout; + } gfx9; + } u; /* Additional metadata associated with the buffer, in bytes. * The maximum size is 64 * 4. This is opaque for the winsys & kernel. @@ -256,9 +158,54 @@ struct radeon_bo_metadata { uint32_t metadata[64]; }; -struct radeon_winsys_bo; struct radeon_winsys_fence; -struct radeon_winsys_sem; +struct radeon_winsys_ctx; + +struct radeon_winsys_bo { + uint64_t va; + bool is_local; + bool vram_no_cpu_access; +}; +struct radv_winsys_sem_counts { + uint32_t syncobj_count; + uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */ + uint32_t sem_count; + uint32_t *syncobj; + struct radeon_winsys_sem **sem; +}; + +struct radv_winsys_sem_info { + bool cs_emit_signal; + bool cs_emit_wait; + struct radv_winsys_sem_counts wait; + struct radv_winsys_sem_counts signal; +}; + +struct radv_winsys_bo_list { + struct radeon_winsys_bo **bos; + unsigned count; +}; + +/* Kernel effectively allows 0-31. This sets some priorities for fixed + * functionality buffers */ +enum { + RADV_BO_PRIORITY_APPLICATION_MAX = 28, + + /* virtual buffers have 0 priority since the priority is not used. */ + RADV_BO_PRIORITY_VIRTUAL = 0, + + /* This should be considerably lower than most of the stuff below, + * but how much lower is hard to say since we don't know application + * assignments. Put it pretty high since it is GTT anyway. */ + RADV_BO_PRIORITY_QUERY_POOL = 29, + + RADV_BO_PRIORITY_DESCRIPTOR = 30, + RADV_BO_PRIORITY_UPLOAD_BUFFER = 30, + RADV_BO_PRIORITY_FENCE = 30, + RADV_BO_PRIORITY_SHADER = 31, + RADV_BO_PRIORITY_SCRATCH = 31, + RADV_BO_PRIORITY_CS = 31, +}; struct radeon_winsys { void (*destroy)(struct radeon_winsys *ws); @@ -266,101 +213,158 @@ struct radeon_winsys { void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info); + uint64_t (*query_value)(struct radeon_winsys *ws, + enum radeon_value_id value); + + bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, + unsigned num_registers, uint32_t *out); + + const char *(*get_chip_name)(struct radeon_winsys *ws); + struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws, uint64_t size, unsigned alignment, enum radeon_bo_domain domain, - enum radeon_bo_flag flags); + enum radeon_bo_flag flags, + unsigned priority); void (*buffer_destroy)(struct radeon_winsys_bo *bo); void *(*buffer_map)(struct radeon_winsys_bo *bo); + struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws, + void *pointer, + uint64_t size, + unsigned priority); + struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws, int fd, - unsigned *stride, unsigned *offset); + unsigned priority, + uint64_t *alloc_size); bool (*buffer_get_fd)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, int *fd); - void (*buffer_unmap)(struct radeon_winsys_bo *bo); + bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd, + enum radeon_bo_domain *domains, + enum radeon_bo_flag *flags); - uint64_t (*buffer_get_va)(struct radeon_winsys_bo *bo); + void (*buffer_unmap)(struct radeon_winsys_bo *bo); void (*buffer_set_metadata)(struct radeon_winsys_bo *bo, struct radeon_bo_metadata *md); + void (*buffer_get_metadata)(struct radeon_winsys_bo *bo, + struct radeon_bo_metadata *md); - void (*buffer_virtual_bind)(struct radeon_winsys_bo *parent, - uint64_t offset, uint64_t size, - struct radeon_winsys_bo *bo, uint64_t bo_offset); - struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws); + VkResult (*buffer_virtual_bind)(struct radeon_winsys_bo *parent, + uint64_t offset, uint64_t size, + struct radeon_winsys_bo *bo, uint64_t bo_offset); + VkResult (*ctx_create)(struct radeon_winsys *ws, + enum radeon_ctx_priority priority, + struct radeon_winsys_ctx **ctx); void (*ctx_destroy)(struct radeon_winsys_ctx *ctx); bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, int ring_index); - struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, + struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type); - void (*cs_destroy)(struct radeon_winsys_cs *cs); + void (*cs_destroy)(struct radeon_cmdbuf *cs); - void (*cs_reset)(struct radeon_winsys_cs *cs); + void (*cs_reset)(struct radeon_cmdbuf *cs); - bool (*cs_finalize)(struct radeon_winsys_cs *cs); + VkResult (*cs_finalize)(struct radeon_cmdbuf *cs); - void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size); + void (*cs_grow)(struct radeon_cmdbuf * cs, size_t min_size); - int (*cs_submit)(struct radeon_winsys_ctx *ctx, - int queue_index, - struct radeon_winsys_cs **cs_array, - unsigned cs_count, - struct radeon_winsys_cs *initial_preamble_cs, - struct radeon_winsys_cs *continue_preamble_cs, - struct radeon_winsys_sem **wait_sem, - unsigned wait_sem_count, - struct radeon_winsys_sem **signal_sem, - unsigned signal_sem_count, - bool can_patch, - struct radeon_winsys_fence *fence); + VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, + int queue_index, + struct radeon_cmdbuf **cs_array, + unsigned cs_count, + struct radeon_cmdbuf *initial_preamble_cs, + struct radeon_cmdbuf *continue_preamble_cs, + struct radv_winsys_sem_info *sem_info, + const struct radv_winsys_bo_list *bo_list, /* optional */ + bool can_patch, + struct radeon_winsys_fence *fence); - void (*cs_add_buffer)(struct radeon_winsys_cs *cs, - struct radeon_winsys_bo *bo, - uint8_t priority); + void (*cs_add_buffer)(struct radeon_cmdbuf *cs, + struct radeon_winsys_bo *bo); - void (*cs_execute_secondary)(struct radeon_winsys_cs *parent, - struct radeon_winsys_cs *child); + void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, + struct radeon_cmdbuf *child); - void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id); + void (*cs_dump)(struct radeon_cmdbuf *cs, FILE* file, const int *trace_ids, int trace_id_count); int (*surface_init)(struct radeon_winsys *ws, - const struct radeon_surf_info *surf_info, - struct radeon_surf *surf); - - int (*surface_best)(struct radeon_winsys *ws, + const struct ac_surf_info *surf_info, struct radeon_surf *surf); struct radeon_winsys_fence *(*create_fence)(); void (*destroy_fence)(struct radeon_winsys_fence *fence); + void (*reset_fence)(struct radeon_winsys_fence *fence); + void (*signal_fence)(struct radeon_winsys_fence *fence); + bool (*is_fence_waitable)(struct radeon_winsys_fence *fence); bool (*fence_wait)(struct radeon_winsys *ws, struct radeon_winsys_fence *fence, bool absolute, uint64_t timeout); + bool (*fences_wait)(struct radeon_winsys *ws, + struct radeon_winsys_fence *const *fences, + uint32_t fence_count, + bool wait_all, + uint64_t timeout); + /* old semaphores - non shareable */ struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws); void (*destroy_sem)(struct radeon_winsys_sem *sem); + /* new shareable sync objects */ + int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled, + uint32_t *handle); + void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle); + + void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle); + void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle); + bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count, + bool wait_all, uint64_t timeout); + + int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd); + int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj); + + int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd); + + /* Note that this, unlike the normal import, uses an existing syncobj. */ + int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd); + }; -static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value) +static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value) { cs->buf[cs->cdw++] = value; } -static inline void radeon_emit_array(struct radeon_winsys_cs *cs, +static inline void radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, unsigned count) { memcpy(cs->buf + cs->cdw, values, count * 4); cs->cdw += count; } +static inline uint64_t radv_buffer_get_va(struct radeon_winsys_bo *bo) +{ + return bo->va; +} + +static inline void radv_cs_add_buffer(struct radeon_winsys *ws, + struct radeon_cmdbuf *cs, + struct radeon_winsys_bo *bo) +{ + if (bo->is_local) + return; + + ws->cs_add_buffer(cs, bo); +} + #endif /* RADV_RADEON_WINSYS_H */