X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_pipe.h;h=d32feab52c2614bb99dbc22895855c48f6b7db41;hb=c4c17ab3ec1d67b0f2fd9816681378bdc8efe220;hp=5d43a594759ee2c778298dba29802b718f05fefd;hpb=951d60f8cdc886adff09201ff65002e3ee1a4c61;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 5d43a594759..d32feab52c2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -39,7 +39,7 @@ #endif #define ATI_VENDOR_ID 0x1002 - +#define SI_PRIM_DISCARD_DEBUG 0 #define SI_NOT_QUERY 0xffffffff /* The base vertex and primitive restart can be any number, but we must pick @@ -72,7 +72,7 @@ /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ #define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6) /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't - * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ + * invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7) /* Writeback & invalidate the L2 metadata cache. It can only be coupled with * a CB or DB flush. */ @@ -147,7 +147,6 @@ enum { /* Shader compiler options (with no effect on the shader cache): */ DBG_CHECK_IR, - DBG_NIR, DBG_MONOLITHIC_SHADERS, DBG_NO_OPT_VARIANT, @@ -166,6 +165,9 @@ enum { DBG_ZERO_VRAM, /* 3D engine options: */ + DBG_ALWAYS_PD, + DBG_PD, + DBG_NO_PD, DBG_SWITCH_ON_EOP, DBG_NO_OUT_OF_ORDER, DBG_NO_DPBB, @@ -210,6 +212,7 @@ enum si_coherency { }; struct si_compute; +struct si_shader_context; struct hash_table; struct u_suballocator; @@ -367,7 +370,7 @@ struct si_surface { unsigned cb_color_view; unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ + unsigned cb_dcc_control; /* GFX8 and later */ unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */ unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */ unsigned spi_shader_col_format_blend:8; /* blending without alpha. */ @@ -469,7 +472,7 @@ struct si_screen { bool has_out_of_order_rast; bool assume_no_z_fights; bool commutative_blend_add; - bool clear_db_cache_before_clear; + bool has_gfx9_scissor_bug; bool has_msaa_sample_loc_bug; bool has_ls_vgpr_init_bug; bool has_dcc_constant_encode; @@ -477,6 +480,11 @@ struct si_screen { bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; + struct { +#define OPT_BOOL(name, dflt, description) bool name:1; +#include "si_debug_options.h" + } options; + /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; @@ -522,6 +530,7 @@ struct si_screen { * the counter before drawing and re-emit the states accordingly. */ unsigned dirty_tex_counter; + unsigned dirty_buf_counter; /* Atomically increment this counter when an existing texture's * metadata is enabled or disabled in a way that requires changing @@ -598,7 +607,7 @@ struct si_sampler_view { #define SI_SAMPLER_STATE_MAGIC 0x34f1c35a struct si_sampler_state { -#ifdef DEBUG +#ifndef NDEBUG unsigned magic; #endif uint32_t val[4]; @@ -668,16 +677,10 @@ struct si_signed_scissor { enum si_quant_mode quant_mode; }; -struct si_scissors { - unsigned dirty_mask; - struct pipe_scissor_state states[SI_MAX_VIEWPORTS]; -}; - struct si_viewports { - unsigned dirty_mask; - unsigned depth_range_dirty_mask; struct pipe_viewport_state states[SI_MAX_VIEWPORTS]; struct si_signed_scissor as_scissor[SI_MAX_VIEWPORTS]; + bool y_inverted; }; struct si_clip_state { @@ -783,10 +786,12 @@ struct si_saved_cs { struct pipe_reference reference; struct si_context *ctx; struct radeon_saved_cs gfx; + struct radeon_saved_cs compute; struct si_resource *trace_buf; unsigned trace_id; unsigned gfx_last_dw; + unsigned compute_last_dw; bool flushed; int64_t time_flush; }; @@ -842,6 +847,7 @@ struct si_context { struct pipe_debug_callback debug; struct ac_llvm_compiler compiler; /* only non-threaded compilation */ struct si_shader_ctx_state fixed_func_tcs_shader; + /* Offset 0: EOP flush number; Offset 4: GDS prim restart counter */ struct si_resource *wait_mem_scratch; unsigned wait_mem_number; uint16_t prefetch_L2_mask; @@ -853,8 +859,8 @@ struct si_context { unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; - unsigned gpu_reset_counter; unsigned last_dirty_tex_counter; + unsigned last_dirty_buf_counter; unsigned last_compressed_colortex_counter; unsigned last_num_draw_calls; unsigned flags; /* flush flags */ @@ -862,6 +868,31 @@ struct si_context { uint64_t vram; uint64_t gtt; + /* Compute-based primitive discard. */ + unsigned prim_discard_vertex_count_threshold; + struct pb_buffer *gds; + struct pb_buffer *gds_oa; + struct radeon_cmdbuf *prim_discard_compute_cs; + unsigned compute_gds_offset; + struct si_shader *compute_ib_last_shader; + uint32_t compute_rewind_va; + unsigned compute_num_prims_in_batch; + bool preserve_prim_restart_gds_at_flush; + /* index_ring is divided into 2 halves for doublebuffering. */ + struct si_resource *index_ring; + unsigned index_ring_base; /* offset of a per-IB portion */ + unsigned index_ring_offset; /* offset within a per-IB portion */ + unsigned index_ring_size_per_ib; /* max available size per IB */ + bool prim_discard_compute_ib_initialized; + /* For tracking the last execution barrier - it can be either + * a WRITE_DATA packet or a fence. */ + uint32_t *last_pkt3_write_data; + struct si_resource *barrier_buf; + unsigned barrier_buf_offset; + struct pipe_fence_handle *last_ib_barrier_fence; + struct si_resource *last_ib_barrier_buf; + unsigned last_ib_barrier_buf_offset; + /* Atoms (direct states). */ union si_state_atoms atoms; unsigned dirty_atoms; /* mask */ @@ -879,7 +910,7 @@ struct si_context { struct si_clip_state clip_state; struct si_shader_data shader_pointers; struct si_stencil_ref stencil_ref; - struct si_scissors scissors; + struct pipe_scissor_state scissors[SI_MAX_VIEWPORTS]; struct si_streamout streamout; struct si_viewports viewports; unsigned num_window_rectangles; @@ -898,6 +929,7 @@ struct si_context { struct si_shader_ctx_state vs_shader; struct si_shader_ctx_state tcs_shader; struct si_shader_ctx_state tes_shader; + struct si_shader_ctx_state cs_prim_discard_state; struct si_cs_shader_state cs_shader_state; /* shader information */ @@ -926,7 +958,7 @@ struct si_context { bool bo_list_add_all_compute_resources; /* other shader resources */ - struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ + struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; @@ -942,6 +974,7 @@ struct si_context { bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; + uint16_t vertex_buffer_unaligned; /* bitmask of not dword-aligned buffers */ /* MSAA config state. */ int ps_iter_samples; @@ -965,6 +998,7 @@ struct si_context { /* Emitted draw state. */ bool gs_tri_strip_adj_fix:1; bool ls_vgpr_fix:1; + bool prim_discard_cs_instancing:1; int last_index_size; int last_base_vertex; int last_start_instance; @@ -1072,12 +1106,16 @@ struct si_context { unsigned num_resident_handles; uint64_t num_alloc_tex_transfer_bytes; unsigned last_tex_ps_draw_ratio; /* for query */ - unsigned context_roll_counter; + unsigned compute_num_verts_accepted; + unsigned compute_num_verts_rejected; + unsigned compute_num_verts_ineligible; /* due to low vertex count */ + unsigned context_roll; /* Queries. */ /* Maintain the list of active queries for pausing between IBs. */ int num_occlusion_queries; int num_perfect_occlusion_queries; + int num_pipeline_stat_queries; struct list_head active_queries; unsigned num_cs_dw_queries_suspend; @@ -1242,7 +1280,7 @@ void si_test_gds(struct si_context *sctx); void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned offset, unsigned size, unsigned dst_sel, unsigned engine, const void *data); -void si_cp_copy_data(struct si_context *sctx, +void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned dst_sel, struct si_resource *dst, unsigned dst_offset, unsigned src_sel, struct si_resource *src, unsigned src_offset); @@ -1258,7 +1296,7 @@ void si_log_compute_state(struct si_context *sctx, struct u_log_context *log); void si_init_debug_functions(struct si_context *sctx); void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved, enum ring_type ring); -bool si_replace_shader(unsigned num, struct ac_shader_binary *binary); +bool si_replace_shader(unsigned num, struct si_shader_binary *binary); /* si_dma.c */ void si_init_dma_functions(struct si_context *sctx); @@ -1276,7 +1314,7 @@ void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst uint64_t offset, uint64_t size, unsigned value); /* si_fence.c */ -void si_cp_release_mem(struct si_context *ctx, +void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel, struct si_resource *buf, uint64_t va, @@ -1306,8 +1344,34 @@ unsigned si_end_counter(struct si_screen *sscreen, unsigned type, uint64_t begin); /* si_compute.c */ +void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs); +unsigned si_get_compute_resource_limits(struct si_screen *sscreen, + unsigned waves_per_threadgroup, + unsigned max_waves_per_sh, + unsigned threadgroups_per_cu); void si_init_compute_functions(struct si_context *sctx); +/* si_compute_prim_discard.c */ +enum si_prim_discard_outcome { + SI_PRIM_DISCARD_ENABLED, + SI_PRIM_DISCARD_DISABLED, + SI_PRIM_DISCARD_DRAW_SPLIT, +}; + +void si_build_prim_discard_compute_shader(struct si_shader_context *ctx); +enum si_prim_discard_outcome +si_prepare_prim_discard_or_split_draw(struct si_context *sctx, + const struct pipe_draw_info *info, + bool primitive_restart); +void si_compute_signal_gfx(struct si_context *sctx); +void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, + const struct pipe_draw_info *info, + unsigned index_size, + unsigned base_vertex, + uint64_t input_indexbuf_va, + unsigned input_indexbuf_max_elements); +void si_initialize_prim_discard_tunables(struct si_context *sctx); + /* si_perfcounters.c */ void si_init_perfcounters(struct si_screen *screen); void si_destroy_perfcounters(struct si_screen *screen); @@ -1574,7 +1638,7 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1596,7 +1660,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1745,6 +1809,11 @@ radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx, radeon_add_to_buffer_list(sctx, sctx->gfx_cs, bo, usage, priority); } +static inline bool si_compute_prim_discard_enabled(struct si_context *sctx) +{ + return sctx->prim_discard_vertex_count_threshold != UINT_MAX; +} + #define PRINT_ERR(fmt, args...) \ fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)