X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_pipe.h;h=d32feab52c2614bb99dbc22895855c48f6b7db41;hb=c4c17ab3ec1d67b0f2fd9816681378bdc8efe220;hp=bfcfc9151245ad15941c9a36a17759a4eca9de81;hpb=e2b9329f17eaf94c0cb6cc9f9bad907500fedeba;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index bfcfc915124..d32feab52c2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -39,7 +39,7 @@ #endif #define ATI_VENDOR_ID 0x1002 - +#define SI_PRIM_DISCARD_DEBUG 0 #define SI_NOT_QUERY 0xffffffff /* The base vertex and primitive restart can be any number, but we must pick @@ -47,6 +47,7 @@ * the number shouldn't be a commonly-used one. */ #define SI_BASE_VERTEX_UNKNOWN INT_MIN #define SI_RESTART_INDEX_UNKNOWN INT_MIN +#define SI_INSTANCE_COUNT_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 #define SI_MAX_POINT_SIZE 2048 #define SI_GS_PER_ES 128 @@ -71,7 +72,7 @@ /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ #define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6) /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't - * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ + * invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7) /* Writeback & invalidate the L2 metadata cache. It can only be coupled with * a CB or DB flush. */ @@ -108,7 +109,21 @@ #define SI_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) #define SI_RESOURCE_FLAG_READ_ONLY (PIPE_RESOURCE_FLAG_DRV_PRIV << 5) #define SI_RESOURCE_FLAG_32BIT (PIPE_RESOURCE_FLAG_DRV_PRIV << 6) -#define SI_RESOURCE_FLAG_SO_FILLED_SIZE (PIPE_RESOURCE_FLAG_DRV_PRIV << 7) +#define SI_RESOURCE_FLAG_CLEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 7) +/* For const_uploader, upload data via GTT and copy to VRAM on context flush via SDMA. */ +#define SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA (PIPE_RESOURCE_FLAG_DRV_PRIV << 8) + +enum si_clear_code +{ + DCC_CLEAR_COLOR_0000 = 0x00000000, + DCC_CLEAR_COLOR_0001 = 0x40404040, + DCC_CLEAR_COLOR_1110 = 0x80808080, + DCC_CLEAR_COLOR_1111 = 0xC0C0C0C0, + DCC_CLEAR_COLOR_REG = 0x20202020, + DCC_UNCOMPRESSED = 0xFFFFFFFF, +}; + +#define SI_IMAGE_ACCESS_AS_BUFFER (1 << 7) /* Debug flags. */ enum { @@ -132,7 +147,6 @@ enum { /* Shader compiler options (with no effect on the shader cache): */ DBG_CHECK_IR, - DBG_NIR, DBG_MONOLITHIC_SHADERS, DBG_NO_OPT_VARIANT, @@ -151,6 +165,9 @@ enum { DBG_ZERO_VRAM, /* 3D engine options: */ + DBG_ALWAYS_PD, + DBG_PD, + DBG_NO_PD, DBG_SWITCH_ON_EOP, DBG_NO_OUT_OF_ORDER, DBG_NO_DPBB, @@ -195,13 +212,14 @@ enum si_coherency { }; struct si_compute; +struct si_shader_context; struct hash_table; struct u_suballocator; /* Only 32-bit buffer allocations are supported, gallium doesn't support more * at the moment. */ -struct r600_resource { +struct si_resource { struct threaded_resource b; /* Winsys objects. */ @@ -251,23 +269,33 @@ struct r600_resource { struct si_transfer { struct threaded_transfer b; - struct r600_resource *staging; + struct si_resource *staging; unsigned offset; }; struct si_texture { - struct r600_resource buffer; + struct si_resource buffer; struct radeon_surf surface; uint64_t size; struct si_texture *flushed_depth_texture; - /* Colorbuffer compression and fast clear. */ + /* One texture allocation can contain these buffers: + * - image (pixel data) + * - FMASK buffer (MSAA compression) + * - CMASK buffer (MSAA compression and/or legacy fast color clear) + * - HTILE buffer (Z/S compression and fast Z/S clear) + * - DCC buffer (color compression and new fast color clear) + * - displayable DCC buffer (if the DCC buffer is not displayable) + * - DCC retile mapping buffer (if the DCC buffer is not displayable) + */ uint64_t fmask_offset; uint64_t cmask_offset; uint64_t cmask_base_address_reg; - struct r600_resource *cmask_buffer; + struct si_resource *cmask_buffer; uint64_t dcc_offset; /* 0 = disabled */ + uint64_t display_dcc_offset; + uint64_t dcc_retile_map_offset; unsigned cb_color_info; /* fast clear enable bit */ unsigned color_clear_value[2]; unsigned last_msaa_resolve_target_micro_mode; @@ -309,9 +337,9 @@ struct si_texture { * target == 2D and last_level == 0. If enabled, dcc_offset contains * the absolute GPUVM address, not the relative one. */ - struct r600_resource *dcc_separate_buffer; + struct si_resource *dcc_separate_buffer; /* When DCC is temporarily disabled, the separate buffer is here. */ - struct r600_resource *last_dcc_separate_buffer; + struct si_resource *last_dcc_separate_buffer; /* Estimate of how much this color buffer is written to in units of * full-screen draws: ps_invocations / (width * height) * Shader kills, late Z, and blending with trivial discards make it @@ -342,7 +370,7 @@ struct si_surface { unsigned cb_color_view; unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ + unsigned cb_dcc_control; /* GFX8 and later */ unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */ unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */ unsigned spi_shader_col_format_blend:8; /* blending without alpha. */ @@ -444,7 +472,7 @@ struct si_screen { bool has_out_of_order_rast; bool assume_no_z_fights; bool commutative_blend_add; - bool clear_db_cache_before_clear; + bool has_gfx9_scissor_bug; bool has_msaa_sample_loc_bug; bool has_ls_vgpr_init_bug; bool has_dcc_constant_encode; @@ -452,6 +480,11 @@ struct si_screen { bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; + struct { +#define OPT_BOOL(name, dflt, description) bool name:1; +#include "si_debug_options.h" + } options; + /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; bool record_llvm_ir; @@ -497,6 +530,7 @@ struct si_screen { * the counter before drawing and re-emit the states accordingly. */ unsigned dirty_tex_counter; + unsigned dirty_buf_counter; /* Atomically increment this counter when an existing texture's * metadata is enabled or disabled in a way that requires changing @@ -573,7 +607,7 @@ struct si_sampler_view { #define SI_SAMPLER_STATE_MAGIC 0x34f1c35a struct si_sampler_state { -#ifdef DEBUG +#ifndef NDEBUG unsigned magic; #endif uint32_t val[4]; @@ -625,6 +659,7 @@ struct si_framebuffer { bool any_dst_linear; bool CB_has_shader_readable_metadata; bool DB_has_shader_readable_metadata; + bool all_DCC_pipe_aligned; }; enum si_quant_mode { @@ -642,16 +677,10 @@ struct si_signed_scissor { enum si_quant_mode quant_mode; }; -struct si_scissors { - unsigned dirty_mask; - struct pipe_scissor_state states[SI_MAX_VIEWPORTS]; -}; - struct si_viewports { - unsigned dirty_mask; - unsigned depth_range_dirty_mask; struct pipe_viewport_state states[SI_MAX_VIEWPORTS]; struct si_signed_scissor as_scissor[SI_MAX_VIEWPORTS]; + bool y_inverted; }; struct si_clip_state { @@ -663,7 +692,7 @@ struct si_streamout_target { struct pipe_stream_output_target b; /* The buffer where BUFFER_FILLED_SIZE is stored. */ - struct r600_resource *buf_filled_size; + struct si_resource *buf_filled_size; unsigned buf_filled_size_offset; bool buf_filled_size_valid; @@ -757,14 +786,24 @@ struct si_saved_cs { struct pipe_reference reference; struct si_context *ctx; struct radeon_saved_cs gfx; - struct r600_resource *trace_buf; + struct radeon_saved_cs compute; + struct si_resource *trace_buf; unsigned trace_id; unsigned gfx_last_dw; + unsigned compute_last_dw; bool flushed; int64_t time_flush; }; +struct si_sdma_upload { + struct si_resource *dst; + struct si_resource *src; + unsigned src_offset; + unsigned dst_offset; + unsigned size; +}; + struct si_context { struct pipe_context b; /* base class */ @@ -773,11 +812,11 @@ struct si_context { struct radeon_winsys *ws; struct radeon_winsys_ctx *ctx; - struct radeon_cmdbuf *gfx_cs; + struct radeon_cmdbuf *gfx_cs; /* compute IB if graphics is disabled */ struct radeon_cmdbuf *dma_cs; struct pipe_fence_handle *last_gfx_fence; struct pipe_fence_handle *last_sdma_fence; - struct r600_resource *eop_bug_scratch; + struct si_resource *eop_bug_scratch; struct u_upload_mgr *cached_gtt_allocator; struct threaded_context *tc; struct u_suballocator *allocator_zeroed_memory; @@ -799,22 +838,29 @@ struct si_context { void *vs_blit_texcoord; void *cs_clear_buffer; void *cs_copy_buffer; + void *cs_copy_image; + void *cs_copy_image_1d_array; + void *cs_clear_render_target; + void *cs_clear_render_target_1d_array; + void *cs_dcc_retile; struct si_screen *screen; struct pipe_debug_callback debug; struct ac_llvm_compiler compiler; /* only non-threaded compilation */ struct si_shader_ctx_state fixed_func_tcs_shader; - struct r600_resource *wait_mem_scratch; + /* Offset 0: EOP flush number; Offset 4: GDS prim restart counter */ + struct si_resource *wait_mem_scratch; unsigned wait_mem_number; uint16_t prefetch_L2_mask; + bool has_graphics; bool gfx_flush_in_progress:1; bool gfx_last_ib_is_busy:1; bool compute_is_busy:1; unsigned num_gfx_cs_flushes; unsigned initial_gfx_cs_size; - unsigned gpu_reset_counter; unsigned last_dirty_tex_counter; + unsigned last_dirty_buf_counter; unsigned last_compressed_colortex_counter; unsigned last_num_draw_calls; unsigned flags; /* flush flags */ @@ -822,6 +868,31 @@ struct si_context { uint64_t vram; uint64_t gtt; + /* Compute-based primitive discard. */ + unsigned prim_discard_vertex_count_threshold; + struct pb_buffer *gds; + struct pb_buffer *gds_oa; + struct radeon_cmdbuf *prim_discard_compute_cs; + unsigned compute_gds_offset; + struct si_shader *compute_ib_last_shader; + uint32_t compute_rewind_va; + unsigned compute_num_prims_in_batch; + bool preserve_prim_restart_gds_at_flush; + /* index_ring is divided into 2 halves for doublebuffering. */ + struct si_resource *index_ring; + unsigned index_ring_base; /* offset of a per-IB portion */ + unsigned index_ring_offset; /* offset within a per-IB portion */ + unsigned index_ring_size_per_ib; /* max available size per IB */ + bool prim_discard_compute_ib_initialized; + /* For tracking the last execution barrier - it can be either + * a WRITE_DATA packet or a fence. */ + uint32_t *last_pkt3_write_data; + struct si_resource *barrier_buf; + unsigned barrier_buf_offset; + struct pipe_fence_handle *last_ib_barrier_fence; + struct si_resource *last_ib_barrier_buf; + unsigned last_ib_barrier_buf_offset; + /* Atoms (direct states). */ union si_state_atoms atoms; unsigned dirty_atoms; /* mask */ @@ -839,7 +910,7 @@ struct si_context { struct si_clip_state clip_state; struct si_shader_data shader_pointers; struct si_stencil_ref stencil_ref; - struct si_scissors scissors; + struct pipe_scissor_state scissors[SI_MAX_VIEWPORTS]; struct si_streamout streamout; struct si_viewports viewports; unsigned num_window_rectangles; @@ -858,6 +929,7 @@ struct si_context { struct si_shader_ctx_state vs_shader; struct si_shader_ctx_state tcs_shader; struct si_shader_ctx_state tes_shader; + struct si_shader_ctx_state cs_prim_discard_state; struct si_cs_shader_state cs_shader_state; /* shader information */ @@ -869,7 +941,7 @@ struct si_context { /* vertex buffer descriptors */ uint32_t *vb_descriptors_gpu_list; - struct r600_resource *vb_descriptors_buffer; + struct si_resource *vb_descriptors_buffer; unsigned vb_descriptors_offset; /* shader descriptors */ @@ -881,14 +953,17 @@ struct si_context { struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS]; struct si_samplers samplers[SI_NUM_SHADERS]; struct si_images images[SI_NUM_SHADERS]; + bool bo_list_add_all_resident_resources; + bool bo_list_add_all_gfx_resources; + bool bo_list_add_all_compute_resources; /* other shader resources */ - struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ + struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ - struct r600_resource *border_color_buffer; + struct si_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ unsigned border_color_count; unsigned num_vs_blit_sgprs; @@ -899,6 +974,7 @@ struct si_context { bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; + uint16_t vertex_buffer_unaligned; /* bitmask of not dword-aligned buffers */ /* MSAA config state. */ int ps_iter_samples; @@ -922,9 +998,11 @@ struct si_context { /* Emitted draw state. */ bool gs_tri_strip_adj_fix:1; bool ls_vgpr_fix:1; + bool prim_discard_cs_instancing:1; int last_index_size; int last_base_vertex; int last_start_instance; + int last_instance_count; int last_drawid; int last_sh_base_reg; int last_primitive_restart_en; @@ -938,11 +1016,11 @@ struct si_context { enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */ /* Scratch buffer */ - struct r600_resource *scratch_buffer; + struct si_resource *scratch_buffer; unsigned scratch_waves; unsigned spi_tmpring_size; - struct r600_resource *compute_scratch_buffer; + struct si_resource *compute_scratch_buffer; /* Emitted derived tessellation state. */ /* Local shader (VS), or HS if LS-HS are merged. */ @@ -1028,12 +1106,16 @@ struct si_context { unsigned num_resident_handles; uint64_t num_alloc_tex_transfer_bytes; unsigned last_tex_ps_draw_ratio; /* for query */ - unsigned context_roll_counter; + unsigned compute_num_verts_accepted; + unsigned compute_num_verts_rejected; + unsigned compute_num_verts_ineligible; /* due to low vertex count */ + unsigned context_roll; /* Queries. */ /* Maintain the list of active queries for pausing between IBs. */ int num_occlusion_queries; int num_perfect_occlusion_queries; + int num_pipeline_stat_queries; struct list_head active_queries; unsigned num_cs_dw_queries_suspend; @@ -1043,6 +1125,12 @@ struct si_context { bool render_cond_invert; bool render_cond_force_off; /* for u_blitter */ + /* For uploading data via GTT and copy to VRAM on context flush via SDMA. */ + bool sdma_uploads_in_progress; + struct si_sdma_upload *sdma_uploads; + unsigned num_sdma_uploads; + unsigned max_sdma_uploads; + /* Statistics gathering for the DCC enablement heuristic. It can't be * in si_texture because si_texture can be shared by multiple * contexts. This is for back buffers only. We shouldn't get too many @@ -1110,17 +1198,17 @@ bool si_rings_is_buffer_referenced(struct si_context *sctx, struct pb_buffer *buf, enum radeon_bo_usage usage); void *si_buffer_map_sync_with_rings(struct si_context *sctx, - struct r600_resource *resource, + struct si_resource *resource, unsigned usage); void si_init_resource_fields(struct si_screen *sscreen, - struct r600_resource *res, + struct si_resource *res, uint64_t size, unsigned alignment); bool si_alloc_resource(struct si_screen *sscreen, - struct r600_resource *res); + struct si_resource *res); struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen, unsigned flags, unsigned usage, unsigned size, unsigned alignment); -struct r600_resource *si_aligned_buffer_create(struct pipe_screen *screen, +struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen, unsigned flags, unsigned usage, unsigned size, unsigned alignment); void si_replace_buffer_storage(struct pipe_context *ctx, @@ -1142,10 +1230,25 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, enum si_cache_policy cache_policy); void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, uint32_t *clear_value, - uint32_t clear_value_size, enum si_coherency coher); + uint32_t clear_value_size, enum si_coherency coher, + bool force_cpdma); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size); +void si_compute_copy_image(struct si_context *sctx, + struct pipe_resource *dst, + unsigned dst_level, + struct pipe_resource *src, + unsigned src_level, + unsigned dstx, unsigned dsty, unsigned dstz, + const struct pipe_box *src_box); +void si_compute_clear_render_target(struct pipe_context *ctx, + struct pipe_surface *dstsurf, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + bool render_condition_enabled); +void si_retile_dcc(struct si_context *sctx, struct si_texture *tex); void si_init_compute_blit_functions(struct si_context *sctx); /* si_cp_dma.c */ @@ -1174,6 +1277,12 @@ void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf uint64_t offset, unsigned size); void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only); void si_test_gds(struct si_context *sctx); +void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, + unsigned offset, unsigned size, unsigned dst_sel, + unsigned engine, const void *data); +void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, + unsigned dst_sel, struct si_resource *dst, unsigned dst_offset, + unsigned src_sel, struct si_resource *src, unsigned src_offset); /* si_debug.c */ void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, @@ -1187,31 +1296,31 @@ void si_log_compute_state(struct si_context *sctx, struct u_log_context *log); void si_init_debug_functions(struct si_context *sctx); void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved, enum ring_type ring); -bool si_replace_shader(unsigned num, struct ac_shader_binary *binary); +bool si_replace_shader(unsigned num, struct si_shader_binary *binary); /* si_dma.c */ void si_init_dma_functions(struct si_context *sctx); /* si_dma_cs.c */ -void si_dma_emit_timestamp(struct si_context *sctx, struct r600_resource *dst, +void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, uint64_t offset); void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned clear_value); void si_need_dma_space(struct si_context *ctx, unsigned num_dw, - struct r600_resource *dst, struct r600_resource *src); + struct si_resource *dst, struct si_resource *src); void si_flush_dma_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence); void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value); /* si_fence.c */ -void si_cp_release_mem(struct si_context *ctx, +void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel, - struct r600_resource *buf, uint64_t va, + struct si_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type); unsigned si_cp_write_fence_dwords(struct si_screen *screen); -void si_cp_wait_mem(struct si_context *ctx, +void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref, uint32_t mask, unsigned flags); void si_init_fence_functions(struct si_context *ctx); void si_init_screen_fence_functions(struct si_screen *screen); @@ -1226,16 +1335,43 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence); void si_begin_new_gfx_cs(struct si_context *ctx); void si_need_gfx_cs_space(struct si_context *ctx); +void si_unref_sdma_uploads(struct si_context *sctx); -/* r600_gpu_load.c */ +/* si_gpu_load.c */ void si_gpu_load_kill_thread(struct si_screen *sscreen); uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type); unsigned si_end_counter(struct si_screen *sscreen, unsigned type, uint64_t begin); /* si_compute.c */ +void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs); +unsigned si_get_compute_resource_limits(struct si_screen *sscreen, + unsigned waves_per_threadgroup, + unsigned max_waves_per_sh, + unsigned threadgroups_per_cu); void si_init_compute_functions(struct si_context *sctx); +/* si_compute_prim_discard.c */ +enum si_prim_discard_outcome { + SI_PRIM_DISCARD_ENABLED, + SI_PRIM_DISCARD_DISABLED, + SI_PRIM_DISCARD_DRAW_SPLIT, +}; + +void si_build_prim_discard_compute_shader(struct si_shader_context *ctx); +enum si_prim_discard_outcome +si_prepare_prim_discard_or_split_draw(struct si_context *sctx, + const struct pipe_draw_info *info, + bool primitive_restart); +void si_compute_signal_gfx(struct si_context *sctx); +void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, + const struct pipe_draw_info *info, + unsigned index_size, + unsigned base_vertex, + uint64_t input_indexbuf_va, + unsigned input_indexbuf_max_elements); +void si_initialize_prim_discard_tunables(struct si_context *sctx); + /* si_perfcounters.c */ void si_init_perfcounters(struct si_screen *screen); void si_destroy_perfcounters(struct si_screen *screen); @@ -1256,6 +1392,11 @@ void *si_create_fixed_func_tcs(struct si_context *sctx); void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy); +void *si_create_copy_image_compute_shader(struct pipe_context *ctx); +void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx); +void *si_clear_render_target_shader(struct pipe_context *ctx); +void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx); +void *si_create_dcc_retile_cs(struct pipe_context *ctx); void *si_create_query_result_cs(struct si_context *sctx); /* si_test_dma.c */ @@ -1327,13 +1468,13 @@ void si_init_context_texture_functions(struct si_context *sctx); * common helpers */ -static inline struct r600_resource *r600_resource(struct pipe_resource *r) +static inline struct si_resource *si_resource(struct pipe_resource *r) { - return (struct r600_resource*)r; + return (struct si_resource*)r; } static inline void -r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res) +si_resource_reference(struct si_resource **ptr, struct si_resource *res) { pipe_resource_reference((struct pipe_resource **)ptr, (struct pipe_resource *)res); @@ -1360,13 +1501,24 @@ si_tile_mode_index(struct si_texture *tex, unsigned level, bool stencil) return tex->surface.u.legacy.tiling_index[level]; } +static inline unsigned +si_get_minimum_num_gfx_cs_dwords(struct si_context *sctx) +{ + /* Don't count the needed CS space exactly and just use an upper bound. + * + * Also reserve space for stopping queries at the end of IB, because + * the number of active queries is unlimited in theory. + */ + return 2048 + sctx->num_cs_dw_queries_suspend; +} + static inline void si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r) { if (r) { /* Add memory usage for need_gfx_cs_space */ - sctx->vram += r600_resource(r)->vram_usage; - sctx->gtt += r600_resource(r)->gart_usage; + sctx->vram += si_resource(r)->vram_usage; + sctx->gtt += si_resource(r)->gart_usage; } } @@ -1374,6 +1526,7 @@ static inline void si_invalidate_draw_sh_constants(struct si_context *sctx) { sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN; + sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN; } static inline unsigned @@ -1469,7 +1622,7 @@ si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src) static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, - bool shaders_read_metadata) + bool shaders_read_metadata, bool dcc_pipe_aligned) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VMEM_L1; @@ -1479,12 +1632,13 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, * L2 metadata must be flushed if shaders read metadata. * (DCC, CMASK). */ - if (num_samples >= 2) + if (num_samples >= 2 || + (shaders_read_metadata && !dcc_pipe_aligned)) sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1506,7 +1660,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1611,15 +1765,15 @@ radeon_cs_memory_below_limit(struct si_screen *screen, */ static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct radeon_cmdbuf *cs, - struct r600_resource *rbo, + struct si_resource *bo, enum radeon_bo_usage usage, enum radeon_bo_priority priority) { assert(usage); sctx->ws->cs_add_buffer( - cs, rbo->buf, + cs, bo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED), - rbo->domains, priority); + bo->domains, priority); } /** @@ -1641,18 +1795,23 @@ static inline void radeon_add_to_buffer_list(struct si_context *sctx, */ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx, - struct r600_resource *rbo, + struct si_resource *bo, enum radeon_bo_usage usage, enum radeon_bo_priority priority, bool check_mem) { if (check_mem && !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs, - sctx->vram + rbo->vram_usage, - sctx->gtt + rbo->gart_usage)) + sctx->vram + bo->vram_usage, + sctx->gtt + bo->gart_usage)) si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL); - radeon_add_to_buffer_list(sctx, sctx->gfx_cs, rbo, usage, priority); + radeon_add_to_buffer_list(sctx, sctx->gfx_cs, bo, usage, priority); +} + +static inline bool si_compute_prim_discard_enabled(struct si_context *sctx) +{ + return sctx->prim_discard_vertex_count_threshold != UINT_MAX; } #define PRINT_ERR(fmt, args...) \