X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeon%2Fr600_pipe_common.h;h=35168844fbf12d88e10327be31d03a6560adc267;hb=e5375ba028c8ca48b11b86068efb65c4e03153eb;hp=75dc5acc2471e86f3619b627b80eda5b9fcb9202;hpb=7db10093d3ee26b0a5050ae01750465da1a76970;p=mesa.git diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 75dc5acc247..35168844fbf 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -34,12 +34,15 @@ #include +#include "amd/common/ac_binary.h" + #include "radeon/radeon_winsys.h" +#include "util/disk_cache.h" #include "util/u_blitter.h" #include "util/list.h" #include "util/u_range.h" -#include "util/u_slab.h" +#include "util/slab.h" #include "util/u_suballoc.h" #include "util/u_transfer.h" @@ -49,6 +52,7 @@ #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3) +#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) /* Pipeline & streamout query controls. */ @@ -78,6 +82,8 @@ #define DBG_NO_TGSI (1 << 13) #define DBG_NO_ASM (1 << 14) #define DBG_PREOPT_IR (1 << 15) +#define DBG_CHECK_IR (1 << 16) +#define DBG_NO_OPT_VARIANT (1 << 17) /* gaps */ #define DBG_TEST_DMA (1 << 20) /* Bits 21-31 are reserved for the r600g driver. */ @@ -100,10 +106,13 @@ #define DBG_MONOLITHIC_SHADERS (1llu << 47) #define DBG_NO_CE (1llu << 48) #define DBG_UNSAFE_MATH (1llu << 49) +#define DBG_NO_DCC_FB (1llu << 50) #define R600_MAP_BUFFER_ALIGNMENT 64 #define R600_MAX_VIEWPORTS 16 +#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024 + enum r600_coherency { R600_COHERENCY_NONE, /* no cache flushes needed */ R600_COHERENCY_SHADER, @@ -119,45 +128,10 @@ enum r600_coherency { struct r600_common_context; struct r600_perfcounters; struct tgsi_shader_info; +struct r600_qbo_state; -struct radeon_shader_reloc { - char name[32]; - uint64_t offset; -}; - -struct radeon_shader_binary { - /** Shader code */ - unsigned char *code; - unsigned code_size; - - /** Config/Context register state that accompanies this shader. - * This is a stream of dword pairs. First dword contains the - * register address, the second dword contains the value.*/ - unsigned char *config; - unsigned config_size; - - /** The number of bytes of config information for each global symbol. - */ - unsigned config_size_per_symbol; - - /** Constant data accessed by the shader. This will be uploaded - * into a constant buffer. */ - unsigned char *rodata; - unsigned rodata_size; - - /** List of symbol offsets for the shader */ - uint64_t *global_symbol_offsets; - unsigned global_symbol_count; - - struct radeon_shader_reloc *relocs; - unsigned reloc_count; - - /** Disassembled shader in a string. */ - char *disasm_string; -}; - -void radeon_shader_binary_init(struct radeon_shader_binary *b); -void radeon_shader_binary_clean(struct radeon_shader_binary *b); +void radeon_shader_binary_init(struct ac_shader_binary *b); +void radeon_shader_binary_clean(struct ac_shader_binary *b); /* Only 32-bit buffer allocations are supported, gallium doesn't support more * at the moment. @@ -168,9 +142,16 @@ struct r600_resource { /* Winsys objects. */ struct pb_buffer *buf; uint64_t gpu_address; + /* Memory usage if the buffer placement is optimal. */ + uint64_t vram_usage; + uint64_t gart_usage; - /* Resource state. */ + /* Resource properties. */ + uint64_t bo_size; + unsigned bo_alignment; enum radeon_bo_domain domains; + enum radeon_bo_flag flags; + unsigned bind_history; /* The buffer range which is initialized (with a write transfer, * streamout, DMA, or as a random access target). The rest of @@ -219,31 +200,23 @@ struct r600_cmask_info { uint64_t offset; uint64_t size; unsigned alignment; - unsigned pitch; - unsigned height; - unsigned xalign; - unsigned yalign; unsigned slice_tile_max; unsigned base_address_reg; }; -struct r600_htile_info { - unsigned pitch; - unsigned height; - unsigned xalign; - unsigned yalign; -}; - struct r600_texture { struct r600_resource resource; uint64_t size; unsigned num_level0_transfers; + enum pipe_format db_render_format; bool is_depth; + bool db_compatible; + bool can_sample_z; + bool can_sample_s; unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */ unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */ struct r600_texture *flushed_depth_texture; - bool is_flushing_texture; struct radeon_surf surface; /* Colorbuffer compression and fast clear. */ @@ -256,8 +229,8 @@ struct r600_texture { unsigned last_msaa_resolve_target_micro_mode; /* Depth buffer compression and fast clear. */ - struct r600_htile_info htile; struct r600_resource *htile_buffer; + bool tc_compatible_htile; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; bool stencil_cleared; /* if it was cleared at least once */ @@ -265,6 +238,34 @@ struct r600_texture { bool non_disp_tiling; /* R600-Cayman only */ + /* Whether the texture is a displayable back buffer and needs DCC + * decompression, which is expensive. Therefore, it's enabled only + * if statistics suggest that it will pay off and it's allocated + * separately. It can't be bound as a sampler by apps. Limited to + * target == 2D and last_level == 0. If enabled, dcc_offset contains + * the absolute GPUVM address, not the relative one. + */ + struct r600_resource *dcc_separate_buffer; + /* When DCC is temporarily disabled, the separate buffer is here. */ + struct r600_resource *last_dcc_separate_buffer; + /* We need to track DCC dirtiness, because st/dri usually calls + * flush_resource twice per frame (not a bug) and we don't wanna + * decompress DCC twice. Also, the dirty tracking must be done even + * if DCC isn't used, because it's required by the DCC usage analysis + * for a possible future enablement. + */ + bool separate_dcc_dirty; + /* Statistics gathering for the DCC enablement heuristic. */ + bool dcc_gather_statistics; + /* Estimate of how much this color buffer is written to in units of + * full-screen draws: ps_invocations / (width * height) + * Shader kills, late Z, and blending with trivial discards make it + * inaccurate (we need to count CB updates, not PS invocations). + */ + unsigned ps_draw_ratio; + /* The number of clears since the last DCC usage analysis. */ + unsigned num_slow_clears; + /* Counter that should be non-zero if the texture is bound to a * framebuffer. Implemented in radeonsi only. */ @@ -273,7 +274,6 @@ struct r600_texture { struct r600_surface { struct pipe_surface base; - const struct radeon_surf_level *level_info; bool color_initialized; bool depth_initialized; @@ -282,6 +282,7 @@ struct r600_surface { bool alphatest_bypass; bool export_16bpc; bool color_is_int8; + bool color_is_int10; /* Color registers. */ unsigned cb_color_info; @@ -319,6 +320,47 @@ struct r600_surface { unsigned db_preload_control; /* EG and later */ }; +struct r600_mmio_counter { + unsigned busy; + unsigned idle; +}; + +union r600_mmio_counters { + struct { + /* For global GPU load including SDMA. */ + struct r600_mmio_counter gpu; + + /* GRBM_STATUS */ + struct r600_mmio_counter spi; + struct r600_mmio_counter gui; + struct r600_mmio_counter ta; + struct r600_mmio_counter gds; + struct r600_mmio_counter vgt; + struct r600_mmio_counter ia; + struct r600_mmio_counter sx; + struct r600_mmio_counter wd; + struct r600_mmio_counter bci; + struct r600_mmio_counter sc; + struct r600_mmio_counter pa; + struct r600_mmio_counter db; + struct r600_mmio_counter cp; + struct r600_mmio_counter cb; + + /* SRBM_STATUS2 */ + struct r600_mmio_counter sdma; + + /* CP_STAT */ + struct r600_mmio_counter pfp; + struct r600_mmio_counter meq; + struct r600_mmio_counter me; + struct r600_mmio_counter surf_sync; + struct r600_mmio_counter dma; + struct r600_mmio_counter scratch_ram; + struct r600_mmio_counter ce; + } named; + unsigned array[0]; +}; + struct r600_common_screen { struct pipe_screen b; struct radeon_winsys *ws; @@ -329,13 +371,17 @@ struct r600_common_screen { bool has_cp_dma; bool has_streamout; + struct disk_cache *disk_shader_cache; + + struct slab_parent_pool pool_transfers; + /* Texture filter settings. */ int force_aniso; /* -1 = disabled */ /* Auxiliary context. Mainly used to initialize resources. * It must be locked prior to using and flushed before unlocking. */ struct pipe_context *aux_context; - pipe_mutex aux_context_lock; + mtx_t aux_context_lock; /* This must be in the screen, because UE4 uses one context for * compilation and another one for rendering. @@ -345,25 +391,27 @@ struct r600_common_screen { * are loading shaders on demand. This is a monotonic counter. */ unsigned num_shaders_created; + unsigned num_shader_cache_hits; /* GPU load thread. */ - pipe_mutex gpu_load_mutex; - pipe_thread gpu_load_thread; - unsigned gpu_load_counter_busy; - unsigned gpu_load_counter_idle; + mtx_t gpu_load_mutex; + thrd_t gpu_load_thread; + union r600_mmio_counters mmio_counters; volatile unsigned gpu_load_stop_thread; /* bool */ - char renderer_string[64]; + char renderer_string[100]; /* Performance counters. */ struct r600_perfcounters *perfcounters; - /* If pipe_screen wants to re-emit the framebuffer state of all - * contexts, it should atomically increment this. Each context will - * compare this with its own last known value of the counter before - * drawing and re-emit the framebuffer state accordingly. + /* If pipe_screen wants to recompute and re-emit the framebuffer, + * sampler, and image states of all contexts, it should atomically + * increment this. + * + * Each context will compare this with its own last known value of + * the counter before drawing and re-emit the states accordingly. */ - unsigned dirty_fb_counter; + unsigned dirty_tex_counter; /* Atomically increment this counter when an existing texture's * metadata is enabled or disabled in a way that requires changing @@ -371,11 +419,17 @@ struct r600_common_screen { */ unsigned compressed_colortex_counter; - /* Atomically increment this counter when an existing texture's - * backing buffer or tile mode parameters have changed that requires - * recomputation of shader descriptors. - */ - unsigned dirty_tex_descriptor_counter; + struct { + /* Context flags to set so that all writes from earlier jobs + * in the CP are seen by L2 clients. + */ + unsigned cp_to_L2; + + /* Context flags to set so that all writes from earlier + * compute jobs are seen by L2 clients. + */ + unsigned compute_to_L2; + } barrier_flags; void (*query_opaque_metadata)(struct r600_common_screen *rscreen, struct r600_texture *rtex, @@ -448,6 +502,7 @@ struct r600_scissors { struct r600_viewports { struct r600_atom atom; unsigned dirty_mask; + unsigned depth_range_dirty_mask; struct pipe_viewport_state states[R600_MAX_VIEWPORTS]; struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS]; }; @@ -477,16 +532,16 @@ struct r600_common_context { enum chip_class chip_class; struct r600_ring gfx; struct r600_ring dma; + struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; + unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; unsigned gpu_reset_counter; - unsigned last_dirty_fb_counter; + unsigned last_dirty_tex_counter; unsigned last_compressed_colortex_counter; - unsigned last_dirty_tex_descriptor_counter; - struct u_upload_mgr *uploader; struct u_suballocator *allocator_zeroed_memory; - struct util_slab_mempool pool_transfers; + struct slab_child_pool pool_transfers; /* Current unaccounted memory usage. */ uint64_t vram; @@ -497,6 +552,7 @@ struct r600_common_context { struct r600_scissors scissors; struct r600_viewports viewports; bool scissor_enabled; + bool clip_halfz; bool vs_writes_viewport_index; bool vs_disables_clipping_viewport; @@ -509,16 +565,21 @@ struct r600_common_context { int num_perfect_occlusion_queries; struct list_head active_queries; unsigned num_cs_dw_queries_suspend; - /* Additional hardware info. */ - unsigned backend_mask; - unsigned max_db; /* for OQ */ /* Misc stats. */ unsigned num_draw_calls; unsigned num_spill_draw_calls; unsigned num_compute_calls; unsigned num_spill_compute_calls; unsigned num_dma_calls; + unsigned num_cp_dma_calls; + unsigned num_vs_flushes; + unsigned num_ps_flushes; + unsigned num_cs_flushes; + unsigned num_fb_cache_flushes; + unsigned num_L2_invalidates; + unsigned num_L2_writebacks; uint64_t num_alloc_tex_transfer_bytes; + unsigned last_tex_ps_draw_ratio; /* for query */ /* Render condition. */ struct r600_atom render_cond_atom; @@ -536,12 +597,29 @@ struct r600_common_context { float sample_locations_8x[8][2]; float sample_locations_16x[16][2]; - /* The list of all texture buffer objects in this context. - * This list is walked when a buffer is invalidated/reallocated and - * the GPU addresses are updated. */ - struct list_head texture_buffers; + /* Statistics gathering for the DCC enablement heuristic. It can't be + * in r600_texture because r600_texture can be shared by multiple + * contexts. This is for back buffers only. We shouldn't get too many + * of those. + * + * X11 DRI3 rotates among a finite set of back buffers. They should + * all fit in this array. If they don't, separate DCC might never be + * enabled by DCC stat gathering. + */ + struct { + struct r600_texture *tex; + /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */ + struct pipe_query *ps_stats[3]; + /* If all slots are used and another slot is needed, + * the least recently used slot is evicted based on this. */ + int64_t last_use_timestamp; + bool query_active; + } dcc_stats[5]; struct pipe_debug_callback debug; + struct pipe_device_reset_callback device_reset_callback; + + void *query_result_shader; /* Copy one resource to another using async DMA. */ void (*dma_copy)(struct pipe_context *ctx, @@ -552,6 +630,9 @@ struct r600_common_context { unsigned src_level, const struct pipe_box *src_box); + void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, + uint64_t offset, uint64_t size, unsigned value); + void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, enum r600_coherency coher); @@ -573,6 +654,8 @@ struct r600_common_context { /* Enable or disable occlusion queries. */ void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable); + void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st); + /* This ensures there is enough space in the command stream. */ void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw, bool include_draw_vbo); @@ -592,14 +675,20 @@ bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, struct r600_resource *resource, unsigned usage); -bool r600_init_resource(struct r600_common_screen *rscreen, - struct r600_resource *res, - uint64_t size, unsigned alignment); +void r600_buffer_subdata(struct pipe_context *ctx, + struct pipe_resource *buffer, + unsigned usage, unsigned offset, + unsigned size, const void *data); +void r600_init_resource_fields(struct r600_common_screen *rscreen, + struct r600_resource *res, + uint64_t size, unsigned alignment); +bool r600_alloc_resource(struct r600_common_screen *rscreen, + struct r600_resource *res); struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment); struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, - unsigned bind, + unsigned flags, unsigned usage, unsigned size, unsigned alignment); @@ -612,6 +701,14 @@ r600_invalidate_resource(struct pipe_context *ctx, struct pipe_resource *resource); /* r600_common_pipe.c */ +void r600_gfx_write_event_eop(struct r600_common_context *ctx, + unsigned event, unsigned event_flags, + unsigned data_sel, + struct r600_resource *buf, uint64_t va, + uint32_t old_fence, uint32_t new_fence); +unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen); +void r600_gfx_wait_fence(struct r600_common_context *ctx, + uint64_t va, uint32_t ref, uint32_t mask); void r600_draw_rectangle(struct blitter_context *blitter, int x1, int y1, int x2, int y2, float depth, enum blitter_attrib_type type, @@ -622,28 +719,30 @@ void r600_destroy_common_screen(struct r600_common_screen *rscreen); void r600_preflush_suspend_features(struct r600_common_context *ctx); void r600_postflush_resume_features(struct r600_common_context *ctx); bool r600_common_context_init(struct r600_common_context *rctx, - struct r600_common_screen *rscreen); + struct r600_common_screen *rscreen, + unsigned context_flags); void r600_common_context_cleanup(struct r600_common_context *rctx); -void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r); bool r600_can_dump_shader(struct r600_common_screen *rscreen, unsigned processor); +bool r600_extra_shader_checks(struct r600_common_screen *rscreen, + unsigned processor); void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - uint64_t offset, uint64_t size, unsigned value, - enum r600_coherency coher); + uint64_t offset, uint64_t size, unsigned value); struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ); const char *r600_get_llvm_processor_name(enum radeon_family family); void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, struct r600_resource *dst, struct r600_resource *src); -void r600_dma_emit_wait_idle(struct r600_common_context *rctx); void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, struct radeon_saved_cs *saved); void radeon_clear_saved_cs(struct radeon_saved_cs *saved); +bool r600_check_device_reset(struct r600_common_context *rctx); /* r600_gpu_load.c */ void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen); -uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen); -unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin); +uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type); +unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type, + uint64_t begin); /* r600_perfcounters.c */ void r600_perfcounters_destroy(struct r600_common_screen *rscreen); @@ -653,7 +752,7 @@ void r600_init_screen_query_functions(struct r600_common_screen *rscreen); void r600_query_init(struct r600_common_context *rctx); void r600_suspend_queries(struct r600_common_context *ctx); void r600_resume_queries(struct r600_common_context *ctx); -void r600_query_init_backend_mask(struct r600_common_context *ctx); +void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen); /* r600_streamout.c */ void r600_streamout_buffers_dirty(struct r600_common_context *rctx); @@ -690,11 +789,23 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx, void r600_print_texture_info(struct r600_texture *rtex, FILE *f); struct pipe_resource *r600_texture_create(struct pipe_screen *screen, const struct pipe_resource *templ); +bool vi_dcc_formats_compatible(enum pipe_format format1, + enum pipe_format format2); +void vi_dcc_disable_if_incompatible_format(struct r600_common_context *rctx, + struct pipe_resource *tex, + unsigned level, + enum pipe_format view_format); struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe, struct pipe_resource *texture, const struct pipe_surface *templ, unsigned width, unsigned height); unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap); +void vi_separate_dcc_start_query(struct pipe_context *ctx, + struct r600_texture *tex); +void vi_separate_dcc_stop_query(struct pipe_context *ctx, + struct r600_texture *tex); +void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx, + struct r600_texture *tex); void vi_dcc_clear_level(struct r600_common_context *rctx, struct r600_texture *rtex, unsigned level, unsigned clear_value); @@ -703,7 +814,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct r600_atom *fb_state, unsigned *buffers, unsigned *dirty_cbufs, const union pipe_color_union *color); -bool r600_texture_disable_dcc(struct r600_common_screen *rscreen, +bool r600_texture_disable_dcc(struct r600_common_context *rctx, struct r600_texture *rtex); void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); void r600_init_context_texture_functions(struct r600_common_context *rctx); @@ -711,7 +822,8 @@ void r600_init_context_texture_functions(struct r600_common_context *rctx); /* r600_viewport.c */ void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx, struct pipe_scissor_state *scissor); -void r600_set_scissor_enable(struct r600_common_context *rctx, bool enable); +void r600_viewport_set_rast_deps(struct r600_common_context *rctx, + bool scissor_enable, bool clip_halfz); void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx, struct tgsi_shader_info *info); void r600_init_viewport_functions(struct r600_common_context *rctx); @@ -744,6 +856,25 @@ r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) (struct pipe_resource *)res); } +static inline void +r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res) +{ + pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b); +} + +static inline void +r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r) +{ + struct r600_common_context *rctx = (struct r600_common_context *)ctx; + struct r600_resource *res = (struct r600_resource *)r; + + if (res) { + /* Add memory usage for need_gfx_cs_space */ + rctx->vram += res->vram_usage; + rctx->gtt += res->gart_usage; + } +} + static inline bool r600_get_strmout_en(struct r600_common_context *rctx) { return rctx->streamout.streamout_enabled || @@ -810,6 +941,13 @@ r600_get_sampler_view_priority(struct r600_resource *res) return RADEON_PRIO_SAMPLER_TEXTURE; } +static inline bool +r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler) +{ + return (stencil_sampler && tex->can_sample_s) || + (!stencil_sampler && tex->can_sample_z); +} + #define COMPUTE_DBG(rscreen, fmt, args...) \ do { \ if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \