X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_pipe.h;h=431d8a3a4290f34e8cd3e6e8f0e7623bef8a35bb;hb=c485b47383337af02601ab41ad63cc8dbd2fd3ee;hp=91ccbea6ba3af44e63a38a98b4a81761f51de68f;hpb=107f4d3538e6eeab396bf41a4d4334950adf81ac;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 91ccbea6ba3..431d8a3a429 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -26,9 +26,7 @@ #ifndef SI_PIPE_H #define SI_PIPE_H -#include "si_state.h" - -#include +#include "si_shader.h" #ifdef PIPE_ARCH_BIG_ENDIAN #define SI_BIG_ENDIAN 1 @@ -43,6 +41,8 @@ #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 #define SI_GS_PER_ES 128 +/* Alignment for optimal CP DMA performance. */ +#define SI_CPDMA_ALIGNMENT 32 /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) @@ -52,29 +52,19 @@ #define SI_CONTEXT_INV_VMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 2) /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ #define SI_CONTEXT_INV_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 3) +/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't + * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ +#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4) +/* gaps */ /* Framebuffer caches. */ -#define SI_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4) -#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5) -#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6) -#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7) +#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 7) +#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 8) /* Engine synchronization. */ -#define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8) -#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) -#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10) -#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11) -#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 12) -/* Compute only. */ -#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */ -#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 14) - -#define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \ - SI_CONTEXT_FLUSH_AND_INV_CB_META | \ - SI_CONTEXT_FLUSH_AND_INV_DB | \ - SI_CONTEXT_FLUSH_AND_INV_DB_META) - -#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) -#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) -#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) +#define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) +#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10) +#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11) +#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 12) +#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 13) #define SI_MAX_BORDER_COLORS 4096 @@ -85,14 +75,20 @@ struct u_suballocator; struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; + unsigned tess_offchip_block_dw_size; + bool has_distributed_tess; + bool has_draw_indirect_multi; + bool has_ds_bpermute; + bool has_msaa_sample_loc_bug; /* Whether shaders are monolithic (1-part) or separate (3-part). */ bool use_monolithic_shaders; + bool record_llvm_ir; - pipe_mutex shader_parts_mutex; + mtx_t shader_parts_mutex; struct si_shader_part *vs_prologs; - struct si_shader_part *vs_epilogs; struct si_shader_part *tcs_epilogs; + struct si_shader_part *gs_prologs; struct si_shader_part *ps_prologs; struct si_shader_part *ps_epilogs; @@ -108,8 +104,12 @@ struct si_screen { * - GS and CS aren't cached, but it's certainly possible to cache * those as well. */ - pipe_mutex shader_cache_mutex; + mtx_t shader_cache_mutex; struct hash_table *shader_cache; + + /* Shader compiler queue for multithreaded compilation. */ + struct util_queue shader_compiler_queue; + LLVMTargetMachineRef tm[4]; /* used by the queue only */ }; struct si_blend_color { @@ -119,33 +119,44 @@ struct si_blend_color { struct si_sampler_view { struct pipe_sampler_view base; - struct list_head list; /* [0..7] = image descriptor * [4..7] = buffer descriptor */ uint32_t state[8]; uint32_t fmask_state[8]; + const struct legacy_surf_level *base_level_info; + unsigned base_level; + unsigned block_width; bool is_stencil_sampler; + bool dcc_incompatible; }; +#define SI_SAMPLER_STATE_MAGIC 0x34f1c35a + struct si_sampler_state { +#ifdef DEBUG + unsigned magic; +#endif uint32_t val[4]; }; struct si_cs_shader_state { struct si_compute *program; + struct si_compute *emitted_program; + unsigned offset; bool initialized; + bool uses_scratch; }; struct si_textures_info { struct si_sampler_views views; - uint64_t depth_texture_mask; /* which textures are depth */ - uint64_t compressed_colortex_mask; + uint32_t depth_texture_mask; /* which textures are depth */ + uint32_t compressed_colortex_mask; }; struct si_images_info { - struct si_descriptors desc; struct pipe_image_view views[SI_NUM_IMAGES]; uint32_t compressed_colortex_mask; + unsigned enabled_mask; }; struct si_framebuffer { @@ -153,15 +164,18 @@ struct si_framebuffer { struct pipe_framebuffer_state state; unsigned nr_samples; unsigned log_samples; - unsigned cb0_is_integer; unsigned compressed_cb_mask; + unsigned colorbuf_enabled_4bit; unsigned spi_shader_col_format; unsigned spi_shader_col_format_alpha; unsigned spi_shader_col_format_blend; unsigned spi_shader_col_format_blend_alpha; - unsigned color_is_int8; /* bitmask */ + unsigned color_is_int8; + unsigned color_is_int10; unsigned dirty_cbufs; bool dirty_zsbuf; + bool any_dst_linear; + bool do_update_surf_dirtiness; }; struct si_clip_state { @@ -169,6 +183,11 @@ struct si_clip_state { struct pipe_clip_state state; }; +struct si_sample_locs { + struct r600_atom atom; + unsigned nr_samples; +}; + struct si_sample_mask { struct r600_atom atom; uint16_t sample_mask; @@ -183,6 +202,28 @@ struct si_shader_ctx_state { struct si_shader *current; }; +#define SI_NUM_VGT_PARAM_KEY_BITS 12 +#define SI_NUM_VGT_PARAM_STATES (1 << SI_NUM_VGT_PARAM_KEY_BITS) + +/* The IA_MULTI_VGT_PARAM key used to index the table of precomputed values. + * Some fields are set by state-change calls, most are set by draw_vbo. + */ +union si_vgt_param_key { + struct { + unsigned prim:4; + unsigned uses_instancing:1; + unsigned multi_instances_smaller_than_primgroup:1; + unsigned primitive_restart:1; + unsigned count_from_stream_output:1; + unsigned line_stipple_enabled:1; + unsigned uses_tess:1; + unsigned tess_uses_prim_id:1; + unsigned uses_gs:1; + unsigned _pad:32 - SI_NUM_VGT_PARAM_KEY_BITS; + } u; + uint32_t index; +}; + struct si_context { struct r600_common_context b; struct blitter_context *blitter; @@ -191,7 +232,6 @@ struct si_context { void *custom_blend_decompress; void *custom_blend_fastclear; void *custom_blend_dcc_decompress; - void *pstipple_sampler_state; struct si_screen *screen; struct radeon_winsys_cs *ce_ib; @@ -199,26 +239,28 @@ struct si_context { bool ce_need_synchronization; struct u_suballocator *ce_suballocator; - struct pipe_fence_handle *last_gfx_fence; struct si_shader_ctx_state fixed_func_tcs_shader; - LLVMTargetMachineRef tm; + LLVMTargetMachineRef tm; /* only non-threaded compilation */ bool gfx_flush_in_progress; + bool compute_is_busy; /* Atoms (direct states). */ union si_state_atoms atoms; unsigned dirty_atoms; /* mask */ /* PM4 states (precomputed immutable states) */ + unsigned dirty_states; union si_state queued; union si_state emitted; /* Atom declarations. */ - struct r600_atom cache_flush; + struct r600_atom prefetch_L2; struct si_framebuffer framebuffer; - struct r600_atom msaa_sample_locs; + struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; struct r600_atom cb_render_state; + unsigned last_cb_target_mask; struct si_blend_color blend_color; struct r600_atom clip_regs; struct si_clip_state clip_state; @@ -244,11 +286,16 @@ struct si_context { struct si_vertex_element *vertex_elements; unsigned sprite_coord_enable; bool flatshade; + bool do_update_shaders; /* shader descriptors */ struct si_descriptors vertex_buffers; + struct si_descriptors descriptors[SI_NUM_DESCS]; + unsigned descriptors_dirty; + unsigned shader_pointers_dirty; + unsigned compressed_tex_shader_mask; + struct si_buffer_resources rw_buffers; struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; - struct si_buffer_resources rw_buffers[SI_NUM_SHADERS]; struct si_buffer_resources shader_buffers[SI_NUM_SHADERS]; struct si_textures_info samplers[SI_NUM_SHADERS]; struct si_images_info images[SI_NUM_SHADERS]; @@ -258,6 +305,7 @@ struct si_context { struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tf_ring; + struct pipe_resource *tess_offchip_ring; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ struct r600_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ @@ -265,7 +313,7 @@ struct si_context { /* Vertex and index buffers. */ bool vertex_buffers_dirty; - struct pipe_index_buffer index_buffer; + bool vertex_buffer_pointer_dirty; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; /* MSAA config state. */ @@ -286,56 +334,63 @@ struct si_context { bool occlusion_queries_disabled; /* Emitted draw state. */ + int last_index_size; int last_base_vertex; int last_start_instance; + int last_drawid; int last_sh_base_reg; int last_primitive_restart_en; int last_restart_index; int last_gs_out_prim; int last_prim; int last_multi_vgt_param; - int last_ls_hs_config; int last_rast_prim; unsigned last_sc_line_stipple; - int current_rast_prim; /* primitive type after TES, GS */ - unsigned last_gsvs_itemsize; + unsigned current_vs_state; + unsigned last_vs_state; + enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */ + bool gs_tri_strip_adj_fix; /* Scratch buffer */ + struct r600_atom scratch_state; struct r600_resource *scratch_buffer; - boolean emit_scratch_reloc; unsigned scratch_waves; unsigned spi_tmpring_size; + struct r600_resource *compute_scratch_buffer; + /* Emitted derived tessellation state. */ - struct si_shader *last_ls; /* local shader (VS) */ + /* Local shader (VS), or HS if LS-HS are merged. */ + struct si_shader *last_ls; struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; + unsigned last_num_patches; /* Debug state. */ bool is_debug; - uint32_t *last_ib; - unsigned last_ib_dw_size; + struct radeon_saved_cs last_gfx; struct r600_resource *last_trace_buf; struct r600_resource *trace_buf; unsigned trace_id; uint64_t dmesg_timestamp; - unsigned last_bo_count; - struct radeon_bo_list_item *last_bo_list; + unsigned apitrace_call_number; + + /* Other state */ + bool need_check_render_feedback; + + /* Precomputed IA_MULTI_VGT_PARAM */ + union si_vgt_param_key ia_multi_vgt_param_key; + unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES]; }; /* cik_sdma.c */ -void cik_sdma_copy(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box); +void cik_init_sdma_functions(struct si_context *sctx); /* si_blit.c */ void si_init_blit_functions(struct si_context *sctx); -void si_decompress_textures(struct si_context *sctx); +void si_decompress_graphics_textures(struct si_context *sctx); +void si_decompress_compute_textures(struct si_context *sctx); void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dst_level, @@ -345,25 +400,33 @@ void si_resource_copy_region(struct pipe_context *ctx, const struct pipe_box *src_box); /* si_cp_dma.c */ +#define SI_CPDMA_SKIP_CHECK_CS_SPACE (1 << 0) /* don't call need_cs_space */ +#define SI_CPDMA_SKIP_SYNC_AFTER (1 << 1) /* don't wait for DMA after the copy */ +#define SI_CPDMA_SKIP_SYNC_BEFORE (1 << 2) /* don't wait for DMA before the copy (RAW hazards) */ +#define SI_CPDMA_SKIP_GFX_SYNC (1 << 3) /* don't flush caches and don't wait for PS/CS */ +#define SI_CPDMA_SKIP_BO_LIST_UPDATE (1 << 4) /* don't update the BO list */ +#define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \ + SI_CPDMA_SKIP_SYNC_AFTER | \ + SI_CPDMA_SKIP_SYNC_BEFORE | \ + SI_CPDMA_SKIP_GFX_SYNC | \ + SI_CPDMA_SKIP_BO_LIST_UPDATE) + void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset, unsigned size, - bool is_framebuffer); + unsigned user_flags); +void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, + uint64_t offset, unsigned size); void si_init_cp_dma_functions(struct si_context *sctx); /* si_debug.c */ void si_init_debug_functions(struct si_context *sctx); -void si_check_vm_faults(struct si_context *sctx); -bool si_replace_shader(unsigned num, struct radeon_shader_binary *binary); +void si_check_vm_faults(struct r600_common_context *ctx, + struct radeon_saved_cs *saved, enum ring_type ring); +bool si_replace_shader(unsigned num, struct ac_shader_binary *binary); /* si_dma.c */ -void si_dma_copy(struct pipe_context *ctx, - struct pipe_resource *dst, - unsigned dst_level, - unsigned dstx, unsigned dsty, unsigned dstz, - struct pipe_resource *src, - unsigned src_level, - const struct pipe_box *src_box); +void si_init_dma_functions(struct si_context *sctx); /* si_hw_context.c */ void si_context_gfx_flush(void *context, unsigned flags, @@ -388,28 +451,17 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe, * common helpers */ -static inline struct r600_resource * -si_resource_create_custom(struct pipe_screen *screen, - unsigned usage, unsigned size) -{ - assert(size); - return r600_resource(pipe_buffer_create(screen, - PIPE_BIND_CUSTOM, usage, size)); -} - static inline void si_invalidate_draw_sh_constants(struct si_context *sctx) { sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN; - sctx->last_start_instance = -1; /* reset to an unknown value */ - sctx->last_sh_base_reg = -1; /* reset to an unknown value */ } static inline void si_set_atom_dirty(struct si_context *sctx, struct r600_atom *atom, bool dirty) { - unsigned bit = 1 << (atom->id - 1); + unsigned bit = 1 << atom->id; if (dirty) sctx->dirty_atoms |= bit; @@ -417,6 +469,15 @@ si_set_atom_dirty(struct si_context *sctx, sctx->dirty_atoms &= ~bit; } +static inline bool +si_is_atom_dirty(struct si_context *sctx, + struct r600_atom *atom) +{ + unsigned bit = 1 << atom->id; + + return sctx->dirty_atoms & bit; +} + static inline void si_mark_atom_dirty(struct si_context *sctx, struct r600_atom *atom) @@ -424,4 +485,41 @@ si_mark_atom_dirty(struct si_context *sctx, si_set_atom_dirty(sctx, atom, true); } +static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) +{ + if (sctx->gs_shader.cso) + return &sctx->gs_shader.cso->info; + else if (sctx->tes_shader.cso) + return &sctx->tes_shader.cso->info; + else if (sctx->vs_shader.cso) + return &sctx->vs_shader.cso->info; + else + return NULL; +} + +static inline struct si_shader* si_get_vs_state(struct si_context *sctx) +{ + if (sctx->gs_shader.current) + return sctx->gs_shader.cso->gs_copy_shader; + else if (sctx->tes_shader.current) + return sctx->tes_shader.current; + else + return sctx->vs_shader.current; +} + +static inline unsigned +si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size) +{ + unsigned alignment, tcc_cache_line_size; + + /* If the upload size is less than the cache line size (e.g. 16, 32), + * the whole thing will fit into a cache line if we align it to its size. + * The idea is that multiple small uploads can share a cache line. + * If the upload size is greater, align it to the cache line size. + */ + alignment = util_next_power_of_two(upload_size); + tcc_cache_line_size = sctx->screen->b.info.tcc_cache_line_size; + return MIN2(alignment, tcc_cache_line_size); +} + #endif