r600/radeonsi: enable glsl/tgsi on-disk cache
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
index 54991e87f4cf01f315199f1ca79b5871ebbdf0f0..94cf0fcc055647f338eab0b3d85e8e47691142a3 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "radeon/radeon_winsys.h"
 
+#include "util/disk_cache.h"
 #include "util/u_blitter.h"
 #include "util/list.h"
 #include "util/u_range.h"
@@ -49,6 +50,7 @@
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH       (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 #define R600_RESOURCE_FLAG_FORCE_TILING                (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define R600_RESOURCE_FLAG_DISABLE_DCC         (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
+#define R600_RESOURCE_FLAG_UNMAPPABLE          (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
 
 #define R600_CONTEXT_STREAMOUT_FLUSH           (1u << 0)
 /* Pipeline & streamout query controls. */
@@ -79,6 +81,7 @@
 #define DBG_NO_ASM             (1 << 14)
 #define DBG_PREOPT_IR          (1 << 15)
 #define DBG_CHECK_IR           (1 << 16)
+#define DBG_NO_OPT_VARIANT     (1 << 17)
 /* gaps */
 #define DBG_TEST_DMA           (1 << 20)
 /* Bits 21-31 are reserved for the r600g driver. */
 #define R600_MAP_BUFFER_ALIGNMENT 64
 #define R600_MAX_VIEWPORTS        16
 
+#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
+
 enum r600_coherency {
        R600_COHERENCY_NONE, /* no cache flushes needed */
        R600_COHERENCY_SHADER,
@@ -230,26 +235,16 @@ struct r600_cmask_info {
        uint64_t offset;
        uint64_t size;
        unsigned alignment;
-       unsigned pitch;
-       unsigned height;
-       unsigned xalign;
-       unsigned yalign;
        unsigned slice_tile_max;
        unsigned base_address_reg;
 };
 
-struct r600_htile_info {
-       unsigned pitch;
-       unsigned height;
-       unsigned xalign;
-       unsigned yalign;
-};
-
 struct r600_texture {
        struct r600_resource            resource;
 
        uint64_t                        size;
        unsigned                        num_level0_transfers;
+       enum pipe_format                db_render_format;
        bool                            is_depth;
        bool                            db_compatible;
        bool                            can_sample_z;
@@ -269,8 +264,8 @@ struct r600_texture {
        unsigned                        last_msaa_resolve_target_micro_mode;
 
        /* Depth buffer compression and fast clear. */
-       struct r600_htile_info          htile;
        struct r600_resource            *htile_buffer;
+       bool                            tc_compatible_htile;
        bool                            depth_cleared; /* if it was cleared at least once */
        float                           depth_clear_value;
        bool                            stencil_cleared; /* if it was cleared at least once */
@@ -314,7 +309,6 @@ struct r600_texture {
 
 struct r600_surface {
        struct pipe_surface             base;
-       const struct radeon_surf_level  *level_info;
 
        bool color_initialized;
        bool depth_initialized;
@@ -323,6 +317,7 @@ struct r600_surface {
        bool alphatest_bypass;
        bool export_16bpc;
        bool color_is_int8;
+       bool color_is_int10;
 
        /* Color registers. */
        unsigned cb_color_info;
@@ -360,6 +355,47 @@ struct r600_surface {
        unsigned db_preload_control;    /* EG and later */
 };
 
+struct r600_mmio_counter {
+       unsigned busy;
+       unsigned idle;
+};
+
+union r600_mmio_counters {
+       struct {
+               /* For global GPU load including SDMA. */
+               struct r600_mmio_counter gpu;
+
+               /* GRBM_STATUS */
+               struct r600_mmio_counter spi;
+               struct r600_mmio_counter gui;
+               struct r600_mmio_counter ta;
+               struct r600_mmio_counter gds;
+               struct r600_mmio_counter vgt;
+               struct r600_mmio_counter ia;
+               struct r600_mmio_counter sx;
+               struct r600_mmio_counter wd;
+               struct r600_mmio_counter bci;
+               struct r600_mmio_counter sc;
+               struct r600_mmio_counter pa;
+               struct r600_mmio_counter db;
+               struct r600_mmio_counter cp;
+               struct r600_mmio_counter cb;
+
+               /* SRBM_STATUS2 */
+               struct r600_mmio_counter sdma;
+
+               /* CP_STAT */
+               struct r600_mmio_counter pfp;
+               struct r600_mmio_counter meq;
+               struct r600_mmio_counter me;
+               struct r600_mmio_counter surf_sync;
+               struct r600_mmio_counter dma;
+               struct r600_mmio_counter scratch_ram;
+               struct r600_mmio_counter ce;
+       } named;
+       unsigned array[0];
+};
+
 struct r600_common_screen {
        struct pipe_screen              b;
        struct radeon_winsys            *ws;
@@ -370,6 +406,8 @@ struct r600_common_screen {
        bool                            has_cp_dma;
        bool                            has_streamout;
 
+       struct disk_cache               *disk_shader_cache;
+
        struct slab_parent_pool         pool_transfers;
 
        /* Texture filter settings. */
@@ -388,12 +426,12 @@ struct r600_common_screen {
         * are loading shaders on demand. This is a monotonic counter.
         */
        unsigned                        num_shaders_created;
+       unsigned                        num_shader_cache_hits;
 
        /* GPU load thread. */
        pipe_mutex                      gpu_load_mutex;
        pipe_thread                     gpu_load_thread;
-       unsigned                        gpu_load_counter_busy;
-       unsigned                        gpu_load_counter_idle;
+       union r600_mmio_counters        mmio_counters;
        volatile unsigned               gpu_load_stop_thread; /* bool */
 
        char                            renderer_string[100];
@@ -401,12 +439,14 @@ struct r600_common_screen {
        /* Performance counters. */
        struct r600_perfcounters        *perfcounters;
 
-       /* If pipe_screen wants to re-emit the framebuffer state of all
-        * contexts, it should atomically increment this. Each context will
-        * compare this with its own last known value of the counter before
-        * drawing and re-emit the framebuffer state accordingly.
+       /* If pipe_screen wants to recompute and re-emit the framebuffer,
+        * sampler, and image states of all contexts, it should atomically
+        * increment this.
+        *
+        * Each context will compare this with its own last known value of
+        * the counter before drawing and re-emit the states accordingly.
         */
-       unsigned                        dirty_fb_counter;
+       unsigned                        dirty_tex_counter;
 
        /* Atomically increment this counter when an existing texture's
         * metadata is enabled or disabled in a way that requires changing
@@ -414,12 +454,6 @@ struct r600_common_screen {
         */
        unsigned                        compressed_colortex_counter;
 
-       /* Atomically increment this counter when an existing texture's
-        * backing buffer or tile mode parameters have changed that requires
-        * recomputation of shader descriptors.
-        */
-       unsigned                        dirty_tex_descriptor_counter;
-
        struct {
                /* Context flags to set so that all writes from earlier jobs
                 * in the CP are seen by L2 clients.
@@ -538,11 +572,9 @@ struct r600_common_context {
        unsigned                        num_gfx_cs_flushes;
        unsigned                        initial_gfx_cs_size;
        unsigned                        gpu_reset_counter;
-       unsigned                        last_dirty_fb_counter;
+       unsigned                        last_dirty_tex_counter;
        unsigned                        last_compressed_colortex_counter;
-       unsigned                        last_dirty_tex_descriptor_counter;
 
-       struct u_upload_mgr             *uploader;
        struct u_suballocator           *allocator_zeroed_memory;
        struct slab_child_pool          pool_transfers;
 
@@ -568,18 +600,19 @@ struct r600_common_context {
        int                             num_perfect_occlusion_queries;
        struct list_head                active_queries;
        unsigned                        num_cs_dw_queries_suspend;
-       /* Additional hardware info. */
-       unsigned                        backend_mask;
-       unsigned                        max_db; /* for OQ */
        /* Misc stats. */
        unsigned                        num_draw_calls;
        unsigned                        num_spill_draw_calls;
        unsigned                        num_compute_calls;
        unsigned                        num_spill_compute_calls;
        unsigned                        num_dma_calls;
+       unsigned                        num_cp_dma_calls;
        unsigned                        num_vs_flushes;
        unsigned                        num_ps_flushes;
        unsigned                        num_cs_flushes;
+       unsigned                        num_fb_cache_flushes;
+       unsigned                        num_L2_invalidates;
+       unsigned                        num_L2_writebacks;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
 
@@ -632,6 +665,9 @@ struct r600_common_context {
                         unsigned src_level,
                         const struct pipe_box *src_box);
 
+       void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
+                                uint64_t offset, uint64_t size, unsigned value);
+
        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
                             uint64_t offset, uint64_t size, unsigned value,
                             enum r600_coherency coher);
@@ -687,7 +723,7 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
                                         const struct pipe_resource *templ,
                                         unsigned alignment);
 struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
-                                                 unsigned bind,
+                                                 unsigned flags,
                                                  unsigned usage,
                                                  unsigned size,
                                                  unsigned alignment);
@@ -700,8 +736,11 @@ r600_invalidate_resource(struct pipe_context *ctx,
                         struct pipe_resource *resource);
 
 /* r600_common_pipe.c */
-void r600_gfx_write_fence(struct r600_common_context *ctx, struct r600_resource *buf,
-                         uint64_t va, uint32_t old_value, uint32_t new_value);
+void r600_gfx_write_event_eop(struct r600_common_context *ctx,
+                             unsigned event, unsigned event_flags,
+                             unsigned data_sel,
+                             struct r600_resource *buf, uint64_t va,
+                             uint32_t old_fence, uint32_t new_fence);
 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
 void r600_gfx_wait_fence(struct r600_common_context *ctx,
                         uint64_t va, uint32_t ref, uint32_t mask);
@@ -723,14 +762,12 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
                              unsigned processor);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-                             uint64_t offset, uint64_t size, unsigned value,
-                             enum r600_coherency coher);
+                             uint64_t offset, uint64_t size, unsigned value);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
                                                  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
                         struct r600_resource *dst, struct r600_resource *src);
-void r600_dma_emit_wait_idle(struct r600_common_context *rctx);
 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
                    struct radeon_saved_cs *saved);
 void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
@@ -738,8 +775,9 @@ bool r600_check_device_reset(struct r600_common_context *rctx);
 
 /* r600_gpu_load.c */
 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
-uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
-unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
+uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
+unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
+                         uint64_t begin);
 
 /* r600_perfcounters.c */
 void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
@@ -749,7 +787,7 @@ void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_queries(struct r600_common_context *ctx);
 void r600_resume_queries(struct r600_common_context *ctx);
-void r600_query_init_backend_mask(struct r600_common_context *ctx);
+void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
 
 /* r600_streamout.c */
 void r600_streamout_buffers_dirty(struct r600_common_context *rctx);