X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeon%2Fr600_pipe_common.h;h=952fb77a4532b6439bd6b51f1efd9cce883fc9d7;hb=41e053954d326c53c3970575b2e09deb1f49dbb8;hp=e340e6f7f1e126dcee39071860fe715a2567c412;hpb=a704f1924765fb1cac61cdb9d6e84ad2cb5c7f9c;p=mesa.git diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index e340e6f7f1e..952fb77a453 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -34,14 +34,18 @@ #include +#include "amd/common/ac_binary.h" + #include "radeon/radeon_winsys.h" +#include "util/disk_cache.h" #include "util/u_blitter.h" #include "util/list.h" #include "util/u_range.h" #include "util/slab.h" #include "util/u_suballoc.h" #include "util/u_transfer.h" +#include "util/u_threaded_context.h" #define ATI_VENDOR_ID 0x1002 @@ -49,6 +53,7 @@ #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3) +#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) /* Pipeline & streamout query controls. */ @@ -60,12 +65,12 @@ #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX /* Debug flags. */ -/* logging */ +/* logging and features */ #define DBG_TEX (1 << 0) -/* gap - reuse */ +#define DBG_NIR (1 << 1) #define DBG_COMPUTE (1 << 2) #define DBG_VM (1 << 3) -/* gap - reuse */ +#define DBG_CE (1 << 4) /* shader logging */ #define DBG_FS (1 << 5) #define DBG_VS (1 << 6) @@ -80,29 +85,33 @@ #define DBG_PREOPT_IR (1 << 15) #define DBG_CHECK_IR (1 << 16) #define DBG_NO_OPT_VARIANT (1 << 17) +#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18) /* gaps */ #define DBG_TEST_DMA (1 << 20) /* Bits 21-31 are reserved for the r600g driver. */ /* features */ -#define DBG_NO_ASYNC_DMA (1llu << 32) -#define DBG_NO_HYPERZ (1llu << 33) -#define DBG_NO_DISCARD_RANGE (1llu << 34) -#define DBG_NO_2D_TILING (1llu << 35) -#define DBG_NO_TILING (1llu << 36) -#define DBG_SWITCH_ON_EOP (1llu << 37) -#define DBG_FORCE_DMA (1llu << 38) -#define DBG_PRECOMPILE (1llu << 39) -#define DBG_INFO (1llu << 40) -#define DBG_NO_WC (1llu << 41) -#define DBG_CHECK_VM (1llu << 42) -#define DBG_NO_DCC (1llu << 43) -#define DBG_NO_DCC_CLEAR (1llu << 44) -#define DBG_NO_RB_PLUS (1llu << 45) -#define DBG_SI_SCHED (1llu << 46) -#define DBG_MONOLITHIC_SHADERS (1llu << 47) -#define DBG_NO_CE (1llu << 48) -#define DBG_UNSAFE_MATH (1llu << 49) -#define DBG_NO_DCC_FB (1llu << 50) +#define DBG_NO_ASYNC_DMA (1ull << 32) +#define DBG_NO_HYPERZ (1ull << 33) +#define DBG_NO_DISCARD_RANGE (1ull << 34) +#define DBG_NO_2D_TILING (1ull << 35) +#define DBG_NO_TILING (1ull << 36) +#define DBG_SWITCH_ON_EOP (1ull << 37) +#define DBG_FORCE_DMA (1ull << 38) +#define DBG_PRECOMPILE (1ull << 39) +#define DBG_INFO (1ull << 40) +#define DBG_NO_WC (1ull << 41) +#define DBG_CHECK_VM (1ull << 42) +#define DBG_NO_DCC (1ull << 43) +#define DBG_NO_DCC_CLEAR (1ull << 44) +#define DBG_NO_RB_PLUS (1ull << 45) +#define DBG_SI_SCHED (1ull << 46) +#define DBG_MONOLITHIC_SHADERS (1ull << 47) +#define DBG_NO_CE (1ull << 48) +#define DBG_UNSAFE_MATH (1ull << 49) +#define DBG_NO_DCC_FB (1ull << 50) +#define DBG_TEST_VMFAULT_CP (1ull << 51) +#define DBG_TEST_VMFAULT_SDMA (1ull << 52) +#define DBG_TEST_VMFAULT_SHADER (1ull << 53) #define R600_MAP_BUFFER_ALIGNMENT 64 #define R600_MAX_VIEWPORTS 16 @@ -126,51 +135,14 @@ struct r600_perfcounters; struct tgsi_shader_info; struct r600_qbo_state; -struct radeon_shader_reloc { - char name[32]; - uint64_t offset; -}; - -struct radeon_shader_binary { - /** Shader code */ - unsigned char *code; - unsigned code_size; - - /** Config/Context register state that accompanies this shader. - * This is a stream of dword pairs. First dword contains the - * register address, the second dword contains the value.*/ - unsigned char *config; - unsigned config_size; - - /** The number of bytes of config information for each global symbol. - */ - unsigned config_size_per_symbol; - - /** Constant data accessed by the shader. This will be uploaded - * into a constant buffer. */ - unsigned char *rodata; - unsigned rodata_size; - - /** List of symbol offsets for the shader */ - uint64_t *global_symbol_offsets; - unsigned global_symbol_count; - - struct radeon_shader_reloc *relocs; - unsigned reloc_count; - - /** Disassembled shader in a string. */ - char *disasm_string; - char *llvm_ir_string; -}; - -void radeon_shader_binary_init(struct radeon_shader_binary *b); -void radeon_shader_binary_clean(struct radeon_shader_binary *b); +void radeon_shader_binary_init(struct ac_shader_binary *b); +void radeon_shader_binary_clean(struct ac_shader_binary *b); /* Only 32-bit buffer allocations are supported, gallium doesn't support more * at the moment. */ struct r600_resource { - struct u_resource b; + struct threaded_resource b; /* Winsys objects. */ struct pb_buffer *buf; @@ -209,12 +181,15 @@ struct r600_resource { bool TC_L2_dirty; /* Whether the resource has been exported via resource_get_handle. */ - bool is_shared; unsigned external_usage; /* PIPE_HANDLE_USAGE_* */ + + /* Whether this resource is referenced by bindless handles. */ + bool texture_handle_allocated; + bool image_handle_allocated; }; struct r600_transfer { - struct pipe_transfer transfer; + struct threaded_transfer b; struct r600_resource *staging; unsigned offset; }; @@ -227,6 +202,7 @@ struct r600_fmask_info { unsigned bank_height; unsigned slice_tile_max; unsigned tile_mode_index; + unsigned tile_swizzle; }; struct r600_cmask_info { @@ -234,7 +210,7 @@ struct r600_cmask_info { uint64_t size; unsigned alignment; unsigned slice_tile_max; - unsigned base_address_reg; + uint64_t base_address_reg; }; struct r600_texture { @@ -262,7 +238,7 @@ struct r600_texture { unsigned last_msaa_resolve_target_micro_mode; /* Depth buffer compression and fast clear. */ - struct r600_resource *htile_buffer; + uint64_t htile_offset; bool tc_compatible_htile; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; @@ -308,6 +284,10 @@ struct r600_texture { struct r600_surface { struct pipe_surface base; + /* These can vary with block-compressed textures. */ + unsigned width0; + unsigned height0; + bool color_initialized; bool depth_initialized; @@ -315,6 +295,8 @@ struct r600_surface { bool alphatest_bypass; bool export_16bpc; bool color_is_int8; + bool color_is_int10; + bool dcc_incompatible; /* Color registers. */ unsigned cb_color_info; @@ -325,6 +307,7 @@ struct r600_surface { unsigned cb_color_pitch; /* EG and later */ unsigned cb_color_slice; /* EG and later */ unsigned cb_color_attrib; /* EG and later */ + unsigned cb_color_attrib2; /* GFX9 and later */ unsigned cb_dcc_control; /* VI and later */ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ unsigned cb_color_fmask_slice; /* EG and later */ @@ -338,33 +321,70 @@ struct r600_surface { struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ /* DB registers. */ + uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ + uint64_t db_stencil_base; /* EG and later */ + uint64_t db_htile_data_base; unsigned db_depth_info; /* R600 only, then SI and later */ unsigned db_z_info; /* EG and later */ - unsigned db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */ + unsigned db_z_info2; /* GFX9+ */ unsigned db_depth_view; unsigned db_depth_size; unsigned db_depth_slice; /* EG and later */ - unsigned db_stencil_base; /* EG and later */ unsigned db_stencil_info; /* EG and later */ + unsigned db_stencil_info2; /* GFX9+ */ unsigned db_prefetch_limit; /* R600 only */ unsigned db_htile_surface; - unsigned db_htile_data_base; unsigned db_preload_control; /* EG and later */ }; -struct r600_grbm_counter { +struct r600_mmio_counter { unsigned busy; unsigned idle; }; -union r600_grbm_counters { +union r600_mmio_counters { struct { - struct r600_grbm_counter spi; - struct r600_grbm_counter gui; + /* For global GPU load including SDMA. */ + struct r600_mmio_counter gpu; + + /* GRBM_STATUS */ + struct r600_mmio_counter spi; + struct r600_mmio_counter gui; + struct r600_mmio_counter ta; + struct r600_mmio_counter gds; + struct r600_mmio_counter vgt; + struct r600_mmio_counter ia; + struct r600_mmio_counter sx; + struct r600_mmio_counter wd; + struct r600_mmio_counter bci; + struct r600_mmio_counter sc; + struct r600_mmio_counter pa; + struct r600_mmio_counter db; + struct r600_mmio_counter cp; + struct r600_mmio_counter cb; + + /* SRBM_STATUS2 */ + struct r600_mmio_counter sdma; + + /* CP_STAT */ + struct r600_mmio_counter pfp; + struct r600_mmio_counter meq; + struct r600_mmio_counter me; + struct r600_mmio_counter surf_sync; + struct r600_mmio_counter cp_dma; + struct r600_mmio_counter scratch_ram; + struct r600_mmio_counter ce; } named; unsigned array[0]; }; +struct r600_memory_object { + struct pipe_memory_object b; + struct pb_buffer *buf; + uint32_t stride; + uint32_t offset; +}; + struct r600_common_screen { struct pipe_screen b; struct radeon_winsys *ws; @@ -374,6 +394,10 @@ struct r600_common_screen { uint64_t debug_flags; bool has_cp_dma; bool has_streamout; + bool has_rbplus; /* if RB+ registers exist */ + bool rbplus_allowed; /* if RB+ is allowed */ + + struct disk_cache *disk_shader_cache; struct slab_parent_pool pool_transfers; @@ -383,7 +407,7 @@ struct r600_common_screen { /* Auxiliary context. Mainly used to initialize resources. * It must be locked prior to using and flushed before unlocking. */ struct pipe_context *aux_context; - pipe_mutex aux_context_lock; + mtx_t aux_context_lock; /* This must be in the screen, because UE4 uses one context for * compilation and another one for rendering. @@ -396,9 +420,9 @@ struct r600_common_screen { unsigned num_shader_cache_hits; /* GPU load thread. */ - pipe_mutex gpu_load_mutex; - pipe_thread gpu_load_thread; - union r600_grbm_counters grbm_counters; + mtx_t gpu_load_mutex; + thrd_t gpu_load_thread; + union r600_mmio_counters mmio_counters; volatile unsigned gpu_load_stop_thread; /* bool */ char renderer_string[100]; @@ -406,12 +430,14 @@ struct r600_common_screen { /* Performance counters. */ struct r600_perfcounters *perfcounters; - /* If pipe_screen wants to re-emit the framebuffer state of all - * contexts, it should atomically increment this. Each context will - * compare this with its own last known value of the counter before - * drawing and re-emit the framebuffer state accordingly. + /* If pipe_screen wants to recompute and re-emit the framebuffer, + * sampler, and image states of all contexts, it should atomically + * increment this. + * + * Each context will compare this with its own last known value of + * the counter before drawing and re-emit the states accordingly. */ - unsigned dirty_fb_counter; + unsigned dirty_tex_counter; /* Atomically increment this counter when an existing texture's * metadata is enabled or disabled in a way that requires changing @@ -419,12 +445,6 @@ struct r600_common_screen { */ unsigned compressed_colortex_counter; - /* Atomically increment this counter when an existing texture's - * backing buffer or tile mode parameters have changed that requires - * recomputation of shader descriptors. - */ - unsigned dirty_tex_descriptor_counter; - struct { /* Context flags to set so that all writes from earlier jobs * in the CP are seen by L2 clients. @@ -479,7 +499,7 @@ struct r600_streamout { /* External state which comes from the vertex shader, * it must be set explicitly when binding a shader. */ - unsigned *stride_in_dw; + uint16_t *stride_in_dw; unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */ /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */ @@ -540,16 +560,17 @@ struct r600_common_context { struct r600_ring dma; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; + struct r600_resource *eop_bug_scratch; unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; unsigned gpu_reset_counter; - unsigned last_dirty_fb_counter; + unsigned last_dirty_tex_counter; unsigned last_compressed_colortex_counter; - unsigned last_dirty_tex_descriptor_counter; - struct u_upload_mgr *uploader; + struct threaded_context *tc; struct u_suballocator *allocator_zeroed_memory; struct slab_child_pool pool_transfers; + struct slab_child_pool pool_transfers_unsync; /* for threaded_context */ /* Current unaccounted memory usage. */ uint64_t vram; @@ -573,11 +594,11 @@ struct r600_common_context { int num_perfect_occlusion_queries; struct list_head active_queries; unsigned num_cs_dw_queries_suspend; - /* Additional hardware info. */ - unsigned backend_mask; - unsigned max_db; /* for OQ */ /* Misc stats. */ unsigned num_draw_calls; + unsigned num_decompress_calls; + unsigned num_mrt_draw_calls; + unsigned num_prim_restart_calls; unsigned num_spill_draw_calls; unsigned num_compute_calls; unsigned num_spill_compute_calls; @@ -586,9 +607,11 @@ struct r600_common_context { unsigned num_vs_flushes; unsigned num_ps_flushes; unsigned num_cs_flushes; - unsigned num_fb_cache_flushes; + unsigned num_cb_cache_flushes; + unsigned num_db_cache_flushes; unsigned num_L2_invalidates; unsigned num_L2_writebacks; + unsigned num_resident_handles; uint64_t num_alloc_tex_transfer_bytes; unsigned last_tex_ps_draw_ratio; /* for query */ @@ -662,6 +685,12 @@ struct r600_common_context { * the buffer is bound, including all resource descriptors. */ void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf); + /* Update all resource bindings where the buffer is bound, including + * all resource descriptors. This is invalidate_buffer without + * the invalidation. */ + void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf, + uint64_t old_gpu_address); + /* Enable or disable occlusion queries. */ void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable); @@ -679,7 +708,7 @@ struct r600_common_context { enum ring_type ring); }; -/* r600_buffer.c */ +/* r600_buffer_common.c */ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, struct pb_buffer *buf, enum radeon_bo_usage usage); @@ -699,7 +728,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment); struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, - unsigned bind, + unsigned flags, unsigned usage, unsigned size, unsigned alignment); @@ -710,13 +739,17 @@ r600_buffer_from_user_memory(struct pipe_screen *screen, void r600_invalidate_resource(struct pipe_context *ctx, struct pipe_resource *resource); +void r600_replace_buffer_storage(struct pipe_context *ctx, + struct pipe_resource *dst, + struct pipe_resource *src); /* r600_common_pipe.c */ void r600_gfx_write_event_eop(struct r600_common_context *ctx, unsigned event, unsigned event_flags, unsigned data_sel, struct r600_resource *buf, uint64_t va, - uint32_t old_fence, uint32_t new_fence); + uint32_t old_fence, uint32_t new_fence, + unsigned query_type); unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); void r600_gfx_wait_fence(struct r600_common_context *ctx, uint64_t va, uint32_t ref, uint32_t mask); @@ -745,7 +778,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family); void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, - struct radeon_saved_cs *saved); + struct radeon_saved_cs *saved, bool get_buffer_list); void radeon_clear_saved_cs(struct radeon_saved_cs *saved); bool r600_check_device_reset(struct r600_common_context *rctx); @@ -763,7 +796,7 @@ void r600_init_screen_query_functions(struct r600_common_screen *rscreen); void r600_query_init(struct r600_common_context *rctx); void r600_suspend_queries(struct r600_common_context *ctx); void r600_resume_queries(struct r600_common_context *ctx); -void r600_query_init_backend_mask(struct r600_common_context *ctx); +void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen); /* r600_streamout.c */ void r600_streamout_buffers_dirty(struct r600_common_context *rctx); @@ -797,18 +830,23 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, bool r600_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture, struct r600_texture **staging); -void r600_print_texture_info(struct r600_texture *rtex, FILE *f); +void r600_print_texture_info(struct r600_common_screen *rscreen, + struct r600_texture *rtex, FILE *f); struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ); bool vi_dcc_formats_compatible(enum pipe_format format1, enum pipe_format format2); -void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx, +bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, + unsigned level, + enum pipe_format view_format); +void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx, struct pipe_resource *tex, unsigned level, enum pipe_format view_format); struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *templ, + unsigned width0, unsigned height0, unsigned width, unsigned height); unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap); void vi_separate_dcc_start_query(struct pipe_context *ctx, @@ -823,7 +861,7 @@ void vi_dcc_clear_level(struct r600_common_context *rctx, void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, - unsigned *buffers, unsigned *dirty_cbufs, + unsigned *buffers, ubyte *dirty_cbufs, const union pipe_color_union *color); bool r600_texture_disable_dcc(struct r600_common_context *rctx, struct r600_texture *rtex); @@ -959,6 +997,12 @@ r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler) (!stencil_sampler && tex->can_sample_z); } +static inline bool +vi_dcc_enabled(struct r600_texture *tex, unsigned level) +{ + return tex->dcc_offset && level < tex->surface.num_dcc_levels; +} + #define COMPUTE_DBG(rscreen, fmt, args...) \ do { \ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \ @@ -974,4 +1018,9 @@ r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler) (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \ (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28)) +static inline int S_FIXED(float value, unsigned frac_bits) +{ + return value * (1 << frac_bits); +} + #endif