radeonsi: add an si_set_rw_shader_buffer convenience function
[mesa.git] / src / gallium / drivers / radeonsi / si_pipe.h
index dad3029bc3189c0daca112139e562bf3ac9b97ec..179671e8871d8b2ad3398ed087665d5c4b604b9c 100644 (file)
 #define SI_BASE_VERTEX_UNKNOWN         INT_MIN
 #define SI_RESTART_INDEX_UNKNOWN       INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES       8
+#define SI_MAX_POINT_SIZE              2048
 #define SI_GS_PER_ES                   128
 /* Alignment for optimal CP DMA performance. */
 #define SI_CPDMA_ALIGNMENT             32
 
+/* Tunables for compute-based clear_buffer and copy_buffer: */
+#define SI_COMPUTE_CLEAR_DW_PER_THREAD 4
+#define SI_COMPUTE_COPY_DW_PER_THREAD  4
+#define SI_COMPUTE_DST_CACHE_POLICY    L2_STREAM
+
 /* Pipeline & streamout query controls. */
 #define SI_CONTEXT_START_PIPELINE_STATS        (1 << 0)
 #define SI_CONTEXT_STOP_PIPELINE_STATS (1 << 1)
 
 #define SI_RESOURCE_FLAG_TRANSFER      (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define SI_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
-#define SI_RESOURCE_FLAG_FORCE_TILING  (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+#define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define SI_RESOURCE_FLAG_DISABLE_DCC   (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
 #define SI_RESOURCE_FLAG_UNMAPPABLE    (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
 #define SI_RESOURCE_FLAG_READ_ONLY     (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
 #define SI_RESOURCE_FLAG_32BIT         (PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
+#define SI_RESOURCE_FLAG_SO_FILLED_SIZE        (PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
 
 /* Debug flags. */
 enum {
@@ -167,11 +174,26 @@ enum {
        DBG_TEST_VMFAULT_SHADER,
        DBG_TEST_DMA_PERF,
        DBG_TEST_GDS,
+       DBG_TEST_GDS_MM,
+       DBG_TEST_GDS_OA_MM,
 };
 
 #define DBG_ALL_SHADERS                (((1 << (DBG_CS + 1)) - 1))
 #define DBG(name)              (1ull << DBG_##name)
 
+enum si_cache_policy {
+       L2_BYPASS,
+       L2_STREAM, /* same as SLC=1 */
+       L2_LRU,    /* same as SLC=0 */
+};
+
+enum si_coherency {
+       SI_COHERENCY_NONE, /* no cache flushes needed */
+       SI_COHERENCY_SHADER,
+       SI_COHERENCY_CB_META,
+       SI_COHERENCY_CP,
+};
+
 struct si_compute;
 struct hash_table;
 struct u_suballocator;
@@ -405,6 +427,9 @@ struct si_screen {
        uint64_t                        debug_flags;
        char                            renderer_string[183];
 
+       unsigned                        pa_sc_raster_config;
+       unsigned                        pa_sc_raster_config_1;
+       unsigned                        se_tile_repeat;
        unsigned                        gs_table_depth;
        unsigned                        tess_offchip_block_dw_size;
        unsigned                        tess_offchip_ring_size;
@@ -422,6 +447,7 @@ struct si_screen {
        bool                            clear_db_cache_before_clear;
        bool                            has_msaa_sample_loc_bug;
        bool                            has_ls_vgpr_init_bug;
+       bool                            has_dcc_constant_encode;
        bool                            dpbb_allowed;
        bool                            dfsm_allowed;
        bool                            llvm_has_working_vgpr_indexing;
@@ -594,17 +620,26 @@ struct si_framebuffer {
        ubyte                           color_is_int8;
        ubyte                           color_is_int10;
        ubyte                           dirty_cbufs;
+       ubyte                           dcc_overwrite_combiner_watermark;
        bool                            dirty_zsbuf;
        bool                            any_dst_linear;
        bool                            CB_has_shader_readable_metadata;
        bool                            DB_has_shader_readable_metadata;
 };
 
+enum si_quant_mode {
+       /* This is the list we want to support. */
+       SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
+       SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
+       SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,
+};
+
 struct si_signed_scissor {
        int minx;
        int miny;
        int maxx;
        int maxy;
+       enum si_quant_mode quant_mode;
 };
 
 struct si_scissors {
@@ -762,6 +797,8 @@ struct si_context {
        void                            *vs_blit_color;
        void                            *vs_blit_color_layered;
        void                            *vs_blit_texcoord;
+       void                            *cs_clear_buffer;
+       void                            *cs_copy_buffer;
        struct si_screen                *screen;
        struct pipe_debug_callback      debug;
        struct ac_llvm_compiler         compiler; /* only non-threaded compilation */
@@ -962,11 +999,14 @@ struct si_context {
        /* MSAA sample locations.
         * The first index is the sample index.
         * The second index is the coordinate: X, Y. */
-       float                   sample_locations_1x[1][2];
-       float                   sample_locations_2x[2][2];
-       float                   sample_locations_4x[4][2];
-       float                   sample_locations_8x[8][2];
-       float                   sample_locations_16x[16][2];
+       struct {
+               float                   x1[1][2];
+               float                   x2[2][2];
+               float                   x4[4][2];
+               float                   x8[8][2];
+               float                   x16[16][2];
+       } sample_positions;
+       struct pipe_resource *sample_pos_buffer;
 
        /* Misc stats. */
        unsigned                        num_draw_calls;
@@ -988,6 +1028,7 @@ struct si_context {
        unsigned                        num_resident_handles;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
+       unsigned                        context_roll_counter;
 
        /* Queries. */
        /* Maintain the list of active queries for pausing between IBs. */
@@ -1096,6 +1137,17 @@ void vi_dcc_clear_level(struct si_context *sctx,
                        unsigned level, unsigned clear_value);
 void si_init_clear_functions(struct si_context *sctx);
 
+/* si_compute_blit.c */
+unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
+                           enum si_cache_policy cache_policy);
+void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
+                    uint64_t offset, uint64_t size, uint32_t *clear_value,
+                    uint32_t clear_value_size, enum si_coherency coher);
+void si_copy_buffer(struct si_context *sctx,
+                   struct pipe_resource *dst, struct pipe_resource *src,
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size);
+void si_init_compute_blit_functions(struct si_context *sctx);
+
 /* si_cp_dma.c */
 #define SI_CPDMA_SKIP_CHECK_CS_SPACE   (1 << 0) /* don't call need_cs_space */
 #define SI_CPDMA_SKIP_SYNC_AFTER       (1 << 1) /* don't wait for DMA after the copy */
@@ -1108,39 +1160,20 @@ void si_init_clear_functions(struct si_context *sctx);
                           SI_CPDMA_SKIP_GFX_SYNC | \
                           SI_CPDMA_SKIP_BO_LIST_UPDATE)
 
-enum si_cache_policy {
-       L2_BYPASS,
-       L2_STREAM, /* same as SLC=1 */
-       L2_LRU,    /* same as SLC=0 */
-};
-
-enum si_coherency {
-       SI_COHERENCY_NONE, /* no cache flushes needed */
-       SI_COHERENCY_SHADER,
-       SI_COHERENCY_CB_META,
-};
-
 void si_cp_dma_wait_for_idle(struct si_context *sctx);
-void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
-                           uint64_t offset, uint64_t size, unsigned value,
-                           enum si_coherency coher,
-                           enum si_cache_policy cache_policy);
-void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
-                    uint64_t offset, uint64_t size, unsigned value,
-                    enum si_coherency coher);
+void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,
+                           struct pipe_resource *dst, uint64_t offset,
+                           uint64_t size, unsigned value, unsigned user_flags,
+                           enum si_coherency coher, enum si_cache_policy cache_policy);
 void si_cp_dma_copy_buffer(struct si_context *sctx,
                           struct pipe_resource *dst, struct pipe_resource *src,
                           uint64_t dst_offset, uint64_t src_offset, unsigned size,
                           unsigned user_flags, enum si_coherency coher,
                           enum si_cache_policy cache_policy);
-void si_copy_buffer(struct si_context *sctx,
-                   struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size);
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
                              uint64_t offset, unsigned size);
 void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only);
 void si_test_gds(struct si_context *sctx);
-void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */
 void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,