radeonsi: add a debug flag for unsafe math LLVM optimizations
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
index 44ab67537b135b9942c8870a1f9f49ce2f22fa83..c0e4282c23f899b7776aca2681f6c8f644a5597f 100644 (file)
@@ -48,6 +48,7 @@
 #define R600_RESOURCE_FLAG_TRANSFER            (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH       (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 #define R600_RESOURCE_FLAG_FORCE_TILING                (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
+#define R600_RESOURCE_FLAG_DISABLE_DCC         (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
 
 #define R600_CONTEXT_STREAMOUT_FLUSH           (1u << 0)
 /* Pipeline & streamout query controls. */
@@ -77,6 +78,8 @@
 #define DBG_NO_TGSI            (1 << 13)
 #define DBG_NO_ASM             (1 << 14)
 #define DBG_PREOPT_IR          (1 << 15)
+/* gaps */
+#define DBG_TEST_DMA           (1 << 20)
 /* Bits 21-31 are reserved for the r600g driver. */
 /* features */
 #define DBG_NO_ASYNC_DMA       (1llu << 32)
 #define DBG_SI_SCHED           (1llu << 46)
 #define DBG_MONOLITHIC_SHADERS (1llu << 47)
 #define DBG_NO_CE              (1llu << 48)
+#define DBG_UNSAFE_MATH                (1llu << 49)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 #define R600_MAX_VIEWPORTS        16
 
+enum r600_coherency {
+       R600_COHERENCY_NONE, /* no cache flushes needed */
+       R600_COHERENCY_SHADER,
+       R600_COHERENCY_CB_META,
+};
+
 #ifdef PIPE_ARCH_BIG_ENDIAN
 #define R600_BIG_ENDIAN 1
 #else
@@ -228,6 +238,7 @@ struct r600_texture {
        struct r600_resource            resource;
 
        uint64_t                        size;
+       unsigned                        num_level0_transfers;
        bool                            is_depth;
        unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
        unsigned                        stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
@@ -242,6 +253,7 @@ struct r600_texture {
        uint64_t                        dcc_offset; /* 0 = disabled */
        unsigned                        cb_color_info; /* fast clear enable bit */
        unsigned                        color_clear_value[2];
+       unsigned                        last_msaa_resolve_target_micro_mode;
 
        /* Depth buffer compression and fast clear. */
        struct r600_htile_info          htile;
@@ -252,10 +264,16 @@ struct r600_texture {
        uint8_t                         stencil_clear_value;
 
        bool                            non_disp_tiling; /* R600-Cayman only */
+
+       /* Counter that should be non-zero if the texture is bound to a
+        * framebuffer. Implemented in radeonsi only.
+        */
+       uint32_t                        framebuffers_bound;
 };
 
 struct r600_surface {
        struct pipe_surface             base;
+       const struct radeon_surf_level  *level_info;
 
        bool color_initialized;
        bool depth_initialized;
@@ -273,7 +291,6 @@ struct r600_surface {
        unsigned cb_color_dim;          /* EG only */
        unsigned cb_color_pitch;        /* EG and later */
        unsigned cb_color_slice;        /* EG and later */
-       unsigned cb_dcc_base;           /* VI and later */
        unsigned cb_color_attrib;       /* EG and later */
        unsigned cb_dcc_control;        /* VI and later */
        unsigned cb_color_fmask;        /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
@@ -355,9 +372,19 @@ struct r600_common_screen {
         */
        unsigned                        compressed_colortex_counter;
 
+       /* Atomically increment this counter when an existing texture's
+        * backing buffer or tile mode parameters have changed that requires
+        * recomputation of shader descriptors.
+        */
+       unsigned                        dirty_tex_descriptor_counter;
+
        void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
                                      struct r600_texture *rtex,
                                      struct radeon_bo_metadata *md);
+
+       void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
+                                   struct r600_texture *rtex,
+                                   struct radeon_bo_metadata *md);
 };
 
 /* This encapsulates a state or an operation which can emitted into the GPU
@@ -447,9 +474,10 @@ struct r600_common_context {
        unsigned                        gpu_reset_counter;
        unsigned                        last_dirty_fb_counter;
        unsigned                        last_compressed_colortex_counter;
+       unsigned                        last_dirty_tex_descriptor_counter;
 
        struct u_upload_mgr             *uploader;
-       struct u_suballocator           *allocator_so_filled_size;
+       struct u_suballocator           *allocator_zeroed_memory;
        struct util_slab_mempool        pool_transfers;
 
        /* Current unaccounted memory usage. */
@@ -478,6 +506,11 @@ struct r600_common_context {
        unsigned                        max_db; /* for OQ */
        /* Misc stats. */
        unsigned                        num_draw_calls;
+       unsigned                        num_spill_draw_calls;
+       unsigned                        num_compute_calls;
+       unsigned                        num_spill_compute_calls;
+       unsigned                        num_dma_calls;
+       uint64_t                        num_alloc_tex_transfer_bytes;
 
        /* Render condition. */
        struct r600_atom                render_cond_atom;
@@ -513,7 +546,7 @@ struct r600_common_context {
 
        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
                             uint64_t offset, uint64_t size, unsigned value,
-                            bool is_framebuffer);
+                            enum r600_coherency coher);
 
        void (*blit_decompress_depth)(struct pipe_context *ctx,
                                      struct r600_texture *texture,
@@ -549,8 +582,7 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
                                       unsigned usage);
 bool r600_init_resource(struct r600_common_screen *rscreen,
                        struct r600_resource *res,
-                       uint64_t size, unsigned alignment,
-                       bool use_reusable_pool);
+                       uint64_t size, unsigned alignment);
 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
                                         const struct pipe_resource *templ,
                                         unsigned alignment);
@@ -585,11 +617,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
                          unsigned processor);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
                              uint64_t offset, uint64_t size, unsigned value,
-                             bool is_framebuffer);
+                             enum r600_coherency coher);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
                                                  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
-void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw);
+void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+                        struct r600_resource *dst, struct r600_resource *src);
+void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
 
 /* r600_gpu_load.c */
 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
@@ -617,7 +651,17 @@ void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
                                             unsigned type, int diff);
 void r600_streamout_init(struct r600_common_context *rctx);
 
+/* r600_test_dma.c */
+void r600_test_dma(struct r600_common_screen *rscreen);
+
 /* r600_texture.c */
+bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
+                               struct r600_texture *rdst,
+                               unsigned dst_level, unsigned dstx,
+                               unsigned dsty, unsigned dstz,
+                               struct r600_texture *rsrc,
+                               unsigned src_level,
+                               const struct pipe_box *src_box);
 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
                                 struct r600_texture *rtex,
                                 unsigned nr_samples,
@@ -635,13 +679,16 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
                                                struct pipe_resource *texture,
                                                const struct pipe_surface *templ,
                                                unsigned width, unsigned height);
-unsigned r600_translate_colorswap(enum pipe_format format);
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
+void vi_dcc_clear_level(struct r600_common_context *rctx,
+                       struct r600_texture *rtex,
+                       unsigned level, unsigned clear_value);
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   struct pipe_framebuffer_state *fb,
                                   struct r600_atom *fb_state,
                                   unsigned *buffers, unsigned *dirty_cbufs,
                                   const union pipe_color_union *color);
-void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
+bool r600_texture_disable_dcc(struct r600_common_screen *rscreen,
                              struct r600_texture *rtex);
 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
 void r600_init_context_texture_functions(struct r600_common_context *rctx);
@@ -757,9 +804,9 @@ r600_get_sampler_view_priority(struct r600_resource *res)
 
 /* For MSAA sample positions. */
 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
-       (((s0x) & 0xf) | (((s0y) & 0xf) << 4) |            \
-       (((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) |     \
-       (((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) |    \
-        (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
+       (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |                  \
+       (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |         \
+       (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |        \
+        (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
 
 #endif