X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_pipe.h;h=8d7703b0e4b0f150d4dd1badd202b20c84724374;hb=dd79aa4ad30df205076be25f6bbf42768abea20a;hp=93f1d653e13b02b2f9a466411fd074f49dc14613;hpb=28c7fbbe0fbd5c06db523140929f572e9c6e9dbe;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 93f1d653e13..8d7703b0e4b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -29,6 +29,7 @@ #include "si_shader.h" #include "util/u_dynarray.h" +#include "util/u_idalloc.h" #ifdef PIPE_ARCH_BIG_ENDIAN #define SI_BIG_ENDIAN 1 @@ -57,9 +58,12 @@ /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4) -/* gaps */ +/* Writeback & invalidate the L2 metadata cache. It can only be coupled with + * a CB or DB flush. */ +#define SI_CONTEXT_INV_L2_METADATA (R600_CONTEXT_PRIVATE_FLAG << 5) /* Framebuffer caches. */ -#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 7) +#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6) +#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 7) #define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 8) /* Engine synchronization. */ #define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) @@ -68,7 +72,16 @@ #define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 12) #define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 13) +#define SI_PREFETCH_VBO_DESCRIPTORS (1 << 0) +#define SI_PREFETCH_LS (1 << 1) +#define SI_PREFETCH_HS (1 << 2) +#define SI_PREFETCH_ES (1 << 3) +#define SI_PREFETCH_GS (1 << 4) +#define SI_PREFETCH_VS (1 << 5) +#define SI_PREFETCH_PS (1 << 6) + #define SI_MAX_BORDER_COLORS 4096 +#define SI_MAX_VIEWPORTS 16 #define SIX_BITS 0x3F struct si_compute; @@ -79,10 +92,16 @@ struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; unsigned tess_offchip_block_dw_size; + bool has_clear_state; bool has_distributed_tess; bool has_draw_indirect_multi; - bool has_ds_bpermute; + bool has_out_of_order_rast; + bool assume_no_z_fights; + bool commutative_blend_add; + bool clear_db_cache_before_clear; bool has_msaa_sample_loc_bug; + bool dpbb_allowed; + bool dfsm_allowed; bool llvm_has_working_vgpr_indexing; /* Whether shaders are monolithic (1-part) or separate (3-part). */ @@ -140,6 +159,7 @@ struct si_sampler_view { ubyte base_level; ubyte block_width; bool is_stencil_sampler; + bool is_integer; bool dcc_incompatible; }; @@ -150,6 +170,8 @@ struct si_sampler_state { unsigned magic; #endif uint32_t val[4]; + uint32_t integer_val[4]; + uint32_t upgraded_depth_val[4]; }; struct si_cs_shader_state { @@ -160,13 +182,17 @@ struct si_cs_shader_state { bool uses_scratch; }; -struct si_textures_info { - struct si_sampler_views views; +struct si_samplers { + struct pipe_sampler_view *views[SI_NUM_SAMPLERS]; + struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS]; + + /* The i-th bit is set if that element is enabled (non-NULL resource). */ + unsigned enabled_mask; uint32_t needs_depth_decompress_mask; uint32_t needs_color_decompress_mask; }; -struct si_images_info { +struct si_images { struct pipe_image_view views[SI_NUM_IMAGES]; uint32_t needs_color_decompress_mask; unsigned enabled_mask; @@ -188,7 +214,29 @@ struct si_framebuffer { ubyte dirty_cbufs; bool dirty_zsbuf; bool any_dst_linear; - bool do_update_surf_dirtiness; + bool CB_has_shader_readable_metadata; + bool DB_has_shader_readable_metadata; +}; + +struct si_signed_scissor { + int minx; + int miny; + int maxx; + int maxy; +}; + +struct si_scissors { + struct r600_atom atom; + unsigned dirty_mask; + struct pipe_scissor_state states[SI_MAX_VIEWPORTS]; +}; + +struct si_viewports { + struct r600_atom atom; + unsigned dirty_mask; + unsigned depth_range_dirty_mask; + struct pipe_viewport_state states[SI_MAX_VIEWPORTS]; + struct si_signed_scissor as_scissor[SI_MAX_VIEWPORTS]; }; struct si_clip_state { @@ -207,6 +255,43 @@ struct si_sample_mask { uint16_t sample_mask; }; +struct si_streamout_target { + struct pipe_stream_output_target b; + + /* The buffer where BUFFER_FILLED_SIZE is stored. */ + struct r600_resource *buf_filled_size; + unsigned buf_filled_size_offset; + bool buf_filled_size_valid; + + unsigned stride_in_dw; +}; + +struct si_streamout { + struct r600_atom begin_atom; + bool begin_emitted; + + unsigned enabled_mask; + unsigned num_targets; + struct si_streamout_target *targets[PIPE_MAX_SO_BUFFERS]; + + unsigned append_bitmask; + bool suspended; + + /* External state which comes from the vertex shader, + * it must be set explicitly when binding a shader. */ + uint16_t *stride_in_dw; + unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */ + + /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */ + unsigned hw_enabled_mask; + + /* The state of VGT_STRMOUT_(CONFIG|EN). */ + struct r600_atom enable_atom; + bool streamout_enabled; + bool prims_gen_query_enabled; + int num_prims_gen_queries; +}; + /* A shader state consists of the shader selector, which is a constant state * object shared by multiple contexts and shouldn't be modified, and * the current shader variant selected for this context. @@ -238,28 +323,32 @@ union si_vgt_param_key { uint32_t index; }; -struct si_bindless_descriptor -{ - struct pb_slab_entry entry; - struct r600_resource *buffer; - unsigned offset; - uint32_t desc_list[16]; - bool dirty; -}; - struct si_texture_handle { - struct si_bindless_descriptor *desc; + unsigned desc_slot; + bool desc_dirty; struct pipe_sampler_view *view; struct si_sampler_state sstate; }; struct si_image_handle { - struct si_bindless_descriptor *desc; + unsigned desc_slot; + bool desc_dirty; struct pipe_image_view view; }; +struct si_saved_cs { + struct pipe_reference reference; + struct si_context *ctx; + struct radeon_saved_cs gfx; + struct r600_resource *trace_buf; + unsigned trace_id; + + unsigned gfx_last_dw; + bool flushed; +}; + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -268,19 +357,17 @@ struct si_context { void *custom_blend_fmask_decompress; void *custom_blend_eliminate_fastclear; void *custom_blend_dcc_decompress; + void *vs_blit_pos; + void *vs_blit_pos_layered; + void *vs_blit_color; + void *vs_blit_color_layered; + void *vs_blit_texcoord; struct si_screen *screen; LLVMTargetMachineRef tm; /* only non-threaded compilation */ struct si_shader_ctx_state fixed_func_tcs_shader; struct r600_resource *wait_mem_scratch; unsigned wait_mem_number; - - struct radeon_winsys_cs *ce_ib; - struct radeon_winsys_cs *ce_preamble_ib; - struct r600_resource *ce_ram_saved_buffer; - struct u_suballocator *ce_suballocator; - unsigned ce_ram_saved_offset; - uint16_t total_ce_ram_allocated; - bool ce_need_synchronization:1; + uint16_t prefetch_L2_mask; bool gfx_flush_in_progress:1; bool compute_is_busy:1; @@ -294,10 +381,10 @@ struct si_context { union si_state emitted; /* Atom declarations. */ - struct r600_atom prefetch_L2; struct si_framebuffer framebuffer; struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; + struct r600_atom dpbb_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; struct r600_atom cb_render_state; @@ -305,9 +392,12 @@ struct si_context { struct si_blend_color blend_color; struct r600_atom clip_regs; struct si_clip_state clip_state; - struct si_shader_data shader_userdata; + struct si_shader_data shader_pointers; struct si_stencil_ref stencil_ref; struct r600_atom spi_map; + struct si_scissors scissors; + struct si_streamout streamout; + struct si_viewports viewports; /* Precomputed states. */ struct si_pm4_state *init_config; @@ -337,8 +427,8 @@ struct si_context { unsigned shader_needs_decompress_mask; struct si_buffer_resources rw_buffers; struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS]; - struct si_textures_info samplers[SI_NUM_SHADERS]; - struct si_images_info images[SI_NUM_SHADERS]; + struct si_samplers samplers[SI_NUM_SHADERS]; + struct si_images images[SI_NUM_SHADERS]; /* other shader resources */ struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ @@ -350,6 +440,8 @@ struct si_context { struct r600_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ unsigned border_color_count; + unsigned num_vs_blit_sgprs; + uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD]; /* Vertex and index buffers. */ bool vertex_buffers_dirty; @@ -376,6 +468,7 @@ struct si_context { /* Emitted draw state. */ bool gs_tri_strip_adj_fix:1; + bool ls_vgpr_fix:1; int last_index_size; int last_base_vertex; int last_start_instance; @@ -411,10 +504,7 @@ struct si_context { /* Debug state. */ bool is_debug; - struct radeon_saved_cs last_gfx; - struct r600_resource *last_trace_buf; - struct r600_resource *trace_buf; - unsigned trace_id; + struct si_saved_cs *current_saved_cs; uint64_t dmesg_timestamp; unsigned apitrace_call_number; @@ -422,16 +512,20 @@ struct si_context { bool need_check_render_feedback; bool decompression_enabled; + bool vs_writes_viewport_index; + bool vs_disables_clipping_viewport; + /* Precomputed IA_MULTI_VGT_PARAM */ union si_vgt_param_key ia_multi_vgt_param_key; unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES]; - /* Slab allocator for bindless descriptors. */ - struct pb_slabs bindless_descriptor_slabs; - /* Bindless descriptors. */ - struct util_dynarray bindless_descriptors; + struct si_descriptors bindless_descriptors; + struct util_idalloc bindless_used_slots; + unsigned num_bindless_descriptors; bool bindless_descriptors_dirty; + bool graphics_bindless_pointer_dirty; + bool compute_bindless_pointer_dirty; /* Allocated bindless handles */ struct hash_table *tex_handles; @@ -449,6 +543,15 @@ struct si_context { /* Bindless state */ bool uses_bindless_samplers; bool uses_bindless_images; + + /* MSAA sample locations. + * The first index is the sample index. + * The second index is the coordinate: X, Y. */ + float sample_locations_1x[1][2]; + float sample_locations_2x[2][2]; + float sample_locations_4x[4][2]; + float sample_locations_8x[8][2]; + float sample_locations_16x[16][2]; }; /* cik_sdma.c */ @@ -456,8 +559,7 @@ void cik_init_sdma_functions(struct si_context *sctx); /* si_blit.c */ void si_init_blit_functions(struct si_context *sctx); -void si_decompress_graphics_textures(struct si_context *sctx); -void si_decompress_compute_textures(struct si_context *sctx); +void si_decompress_textures(struct si_context *sctx, unsigned shader_mask); void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -484,9 +586,14 @@ void si_copy_buffer(struct si_context *sctx, unsigned user_flags); void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size); +void cik_emit_prefetch_L2(struct si_context *sctx); void si_init_cp_dma_functions(struct si_context *sctx); /* si_debug.c */ +void si_auto_log_cs(void *data, struct u_log_context *log); +void si_log_hw_flush(struct si_context *sctx); +void si_log_draw_state(struct si_context *sctx, struct u_log_context *log); +void si_log_compute_state(struct si_context *sctx, struct u_log_context *log); void si_init_debug_functions(struct si_context *sctx); void si_check_vm_faults(struct r600_common_context *ctx, struct radeon_saved_cs *saved, enum ring_type ring); @@ -496,6 +603,7 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary); void si_init_dma_functions(struct si_context *sctx); /* si_hw_context.c */ +void si_destroy_saved_cs(struct si_saved_cs *scs); void si_context_gfx_flush(void *context, unsigned flags, struct pipe_fence_handle **fence); void si_begin_new_cs(struct si_context *ctx); @@ -514,6 +622,11 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context, struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe, const struct pipe_video_buffer *tmpl); +/* si_viewport.c */ +void si_update_vs_viewport_state(struct si_context *ctx); +void si_init_viewport_functions(struct si_context *ctx); + + /* * common helpers */ @@ -578,6 +691,12 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx) return vs->current ? vs->current : NULL; } +static inline bool si_get_strmout_en(struct si_context *sctx) +{ + return sctx->streamout.streamout_enabled || + sctx->streamout.prims_gen_query_enabled; +} + static inline unsigned si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size) { @@ -593,4 +712,57 @@ si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size) return MIN2(alignment, tcc_cache_line_size); } +static inline void +si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src) +{ + if (pipe_reference(&(*dst)->reference, &src->reference)) + si_destroy_saved_cs(*dst); + + *dst = src; +} + +static inline void +si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, + bool shaders_read_metadata) +{ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_INV_VMEM_L1; + + if (sctx->b.chip_class >= GFX9) { + /* Single-sample color is coherent with shaders on GFX9, but + * L2 metadata must be flushed if shaders read metadata. + * (DCC, CMASK). + */ + if (num_samples >= 2) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + else if (shaders_read_metadata) + sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA; + } else { + /* SI-CI-VI */ + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } +} + +static inline void +si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, + bool include_stencil, bool shaders_read_metadata) +{ + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB | + SI_CONTEXT_INV_VMEM_L1; + + if (sctx->b.chip_class >= GFX9) { + /* Single-sample depth (not stencil) is coherent with shaders + * on GFX9, but L2 metadata must be flushed if shaders read + * metadata. + */ + if (num_samples >= 2 || include_stencil) + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + else if (shaders_read_metadata) + sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA; + } else { + /* SI-CI-VI */ + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; + } +} + #endif