gallium/radeon: don't use PREDICATION_OP_CLEAR
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
index a87efdcbdae98dc6a66d1cede80f04b580d4fa18..139c377cd6e58862becc95cff83b4209fa8e4ce5 100644 (file)
 
 #include <stdio.h>
 
-#include "../../winsys/radeon/drm/radeon_winsys.h"
+#include "radeon/radeon_winsys.h"
 
-#include "util/u_double_list.h"
+#include "util/u_blitter.h"
+#include "util/list.h"
 #include "util/u_range.h"
 #include "util/u_slab.h"
 #include "util/u_suballoc.h"
 #define R600_QUERY_NUM_BYTES_MOVED     (PIPE_QUERY_DRIVER_SPECIFIC + 5)
 #define R600_QUERY_VRAM_USAGE          (PIPE_QUERY_DRIVER_SPECIFIC + 6)
 #define R600_QUERY_GTT_USAGE           (PIPE_QUERY_DRIVER_SPECIFIC + 7)
+#define R600_QUERY_GPU_TEMPERATURE     (PIPE_QUERY_DRIVER_SPECIFIC + 8)
+#define R600_QUERY_CURRENT_GPU_SCLK    (PIPE_QUERY_DRIVER_SPECIFIC + 9)
+#define R600_QUERY_CURRENT_GPU_MCLK    (PIPE_QUERY_DRIVER_SPECIFIC + 10)
+#define R600_QUERY_GPU_LOAD            (PIPE_QUERY_DRIVER_SPECIFIC + 11)
+#define R600_QUERY_NUM_COMPILATIONS    (PIPE_QUERY_DRIVER_SPECIFIC + 12)
+#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
 
-/* read caches */
-#define R600_CONTEXT_INV_VERTEX_CACHE          (1 << 0)
-#define R600_CONTEXT_INV_TEX_CACHE             (1 << 1)
-#define R600_CONTEXT_INV_CONST_CACHE           (1 << 2)
-#define R600_CONTEXT_INV_SHADER_CACHE          (1 << 3)
-/* read-write caches */
-#define R600_CONTEXT_STREAMOUT_FLUSH           (1 << 8)
-#define R600_CONTEXT_FLUSH_AND_INV             (1 << 9)
-#define R600_CONTEXT_FLUSH_AND_INV_CB_META     (1 << 10)
-#define R600_CONTEXT_FLUSH_AND_INV_DB_META     (1 << 11)
-#define R600_CONTEXT_FLUSH_AND_INV_DB          (1 << 12)
-#define R600_CONTEXT_FLUSH_AND_INV_CB          (1 << 13)
-/* engine synchronization */
-#define R600_CONTEXT_PS_PARTIAL_FLUSH          (1 << 16)
-#define R600_CONTEXT_WAIT_3D_IDLE              (1 << 17)
-#define R600_CONTEXT_WAIT_CP_DMA_IDLE          (1 << 18)
-#define R600_CONTEXT_VGT_FLUSH                 (1 << 19)
+#define R600_CONTEXT_STREAMOUT_FLUSH           (1u << 0)
+#define R600_CONTEXT_PRIVATE_FLAG              (1u << 1)
+
+/* special primitive types */
+#define R600_PRIM_RECTANGLE_LIST       PIPE_PRIM_MAX
 
 /* Debug flags. */
 /* logging */
 #define DBG_COMPUTE            (1 << 2)
 #define DBG_VM                 (1 << 3)
 #define DBG_TRACE_CS           (1 << 4)
+/* shader logging */
+#define DBG_FS                 (1 << 5)
+#define DBG_VS                 (1 << 6)
+#define DBG_GS                 (1 << 7)
+#define DBG_PS                 (1 << 8)
+#define DBG_CS                 (1 << 9)
+#define DBG_TCS                        (1 << 10)
+#define DBG_TES                        (1 << 11)
+#define DBG_NO_IR              (1 << 12)
+#define DBG_NO_TGSI            (1 << 13)
+#define DBG_NO_ASM             (1 << 14)
+/* Bits 21-31 are reserved for the r600g driver. */
 /* features */
-#define DBG_NO_ASYNC_DMA       (1 << 5)
-/* shaders */
-#define DBG_FS                 (1 << 8)
-#define DBG_VS                 (1 << 9)
-#define DBG_GS                 (1 << 10)
-#define DBG_PS                 (1 << 11)
-#define DBG_CS                 (1 << 12)
-/* features */
-#define DBG_HYPERZ             (1 << 13)
-#define DBG_NO_DISCARD_RANGE   (1 << 14)
-/* The maximum allowed bit is 15. */
+#define DBG_NO_ASYNC_DMA       (1llu << 32)
+#define DBG_NO_HYPERZ          (1llu << 33)
+#define DBG_NO_DISCARD_RANGE   (1llu << 34)
+#define DBG_NO_2D_TILING       (1llu << 35)
+#define DBG_NO_TILING          (1llu << 36)
+#define DBG_SWITCH_ON_EOP      (1llu << 37)
+#define DBG_FORCE_DMA          (1llu << 38)
+#define DBG_PRECOMPILE         (1llu << 39)
+#define DBG_INFO               (1llu << 40)
+#define DBG_NO_WC              (1llu << 41)
+#define DBG_CHECK_VM           (1llu << 42)
+#define DBG_NO_DCC             (1llu << 43)
+#define DBG_NO_DCC_CLEAR       (1llu << 44)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 
 struct r600_common_context;
 
+struct radeon_shader_reloc {
+       char *name;
+       uint64_t offset;
+};
+
 struct radeon_shader_binary {
        /** Shader code */
        unsigned char *code;
@@ -108,9 +122,24 @@ struct radeon_shader_binary {
        unsigned char *config;
        unsigned config_size;
 
-       /** Set to 1 if the disassembly for this binary has been dumped to
-        *  stderr. */
-       int disassembled;
+       /** The number of bytes of config information for each global symbol.
+        */
+       unsigned config_size_per_symbol;
+
+       /** Constant data accessed by the shader.  This will be uploaded
+        * into a constant buffer. */
+       unsigned char *rodata;
+       unsigned rodata_size;
+
+       /** List of symbol offsets for the shader */
+       uint64_t *global_symbol_offsets;
+       unsigned global_symbol_count;
+
+       struct radeon_shader_reloc *relocs;
+       unsigned reloc_count;
+
+       /** Disassembled shader in a string. */
+       char *disasm_string;
 };
 
 struct r600_resource {
@@ -119,6 +148,7 @@ struct r600_resource {
        /* Winsys objects. */
        struct pb_buffer                *buf;
        struct radeon_winsys_cs_handle  *cs_buf;
+       uint64_t                        gpu_address;
 
        /* Resource state. */
        enum radeon_bo_domain           domains;
@@ -132,6 +162,18 @@ struct r600_resource {
         * the unsynchronized map flag and expect the driver to figure it out.
          */
        struct util_range               valid_buffer_range;
+
+       /* For buffers only. This indicates that a write operation has been
+        * performed by TC L2, but the cache hasn't been flushed.
+        * Any hw block which doesn't use or bypasses TC L2 should check this
+        * flag and flush the cache before using the buffer.
+        *
+        * For example, TC L2 must be flushed if a buffer which has been
+        * modified by a shader store instruction is about to be used as
+        * an index buffer. The reason is that VGT DMA index fetching doesn't
+        * use TC L2.
+        */
+       bool                            TC_L2_dirty;
 };
 
 struct r600_transfer {
@@ -165,23 +207,25 @@ struct r600_texture {
        unsigned                        pitch_override;
        bool                            is_depth;
        unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
+       unsigned                        stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
        struct r600_texture             *flushed_depth_texture;
        boolean                         is_flushing_texture;
-       struct radeon_surface           surface;
+       struct radeon_surf              surface;
 
        /* Colorbuffer compression and fast clear. */
        struct r600_fmask_info          fmask;
        struct r600_cmask_info          cmask;
        struct r600_resource            *cmask_buffer;
+       struct r600_resource            *dcc_buffer;
        unsigned                        cb_color_info; /* fast clear enable bit */
        unsigned                        color_clear_value[2];
 
        /* Depth buffer compression and fast clear. */
        struct r600_resource            *htile_buffer;
+       bool                            depth_cleared; /* if it was cleared at least once */
        float                           depth_clear_value;
 
        bool                            non_disp_tiling; /* R600-Cayman only */
-       unsigned                        mipmap_shift;
 };
 
 struct r600_surface {
@@ -202,7 +246,9 @@ struct r600_surface {
        unsigned cb_color_dim;          /* EG only */
        unsigned cb_color_pitch;        /* EG and later */
        unsigned cb_color_slice;        /* EG and later */
+       unsigned cb_dcc_base;           /* VI and later */
        unsigned cb_color_attrib;       /* EG and later */
+       unsigned cb_dcc_control;        /* VI and later */
        unsigned cb_color_fmask;        /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
        unsigned cb_color_fmask_slice;  /* EG and later */
        unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
@@ -239,7 +285,7 @@ struct r600_common_screen {
        enum chip_class                 chip_class;
        struct radeon_info              info;
        struct r600_tiling_info         tiling_info;
-       unsigned                        debug_flags;
+       uint64_t                        debug_flags;
        bool                            has_cp_dma;
        bool                            has_streamout;
 
@@ -251,6 +297,24 @@ struct r600_common_screen {
        struct r600_resource            *trace_bo;
        uint32_t                        *trace_ptr;
        unsigned                        cs_count;
+
+       /* This must be in the screen, because UE4 uses one context for
+        * compilation and another one for rendering.
+        */
+       unsigned                        num_compilations;
+       /* Along with ST_DEBUG=precompile, this should show if applications
+        * are loading shaders on demand. This is a monotonic counter.
+        */
+       unsigned                        num_shaders_created;
+
+       /* GPU load thread. */
+       pipe_mutex                      gpu_load_mutex;
+       pipe_thread                     gpu_load_thread;
+       unsigned                        gpu_load_counter_busy;
+       unsigned                        gpu_load_counter_idle;
+       volatile unsigned               gpu_load_stop_thread; /* bool */
+
+       char                            renderer_string[64];
 };
 
 /* This encapsulates a state or an operation which can emitted into the GPU
@@ -258,7 +322,7 @@ struct r600_common_screen {
 struct r600_atom {
        void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
        unsigned                num_dw;
-       bool                    dirty;
+       unsigned short          id;
 };
 
 struct r600_so_target {
@@ -267,6 +331,7 @@ struct r600_so_target {
        /* The buffer where BUFFER_FILLED_SIZE is stored. */
        struct r600_resource    *buf_filled_size;
        unsigned                buf_filled_size_offset;
+       bool                    buf_filled_size_valid;
 
        unsigned                stride_in_dw;
 };
@@ -286,6 +351,10 @@ struct r600_streamout {
        /* External state which comes from the vertex shader,
         * it must be set explicitly when binding a shader. */
        unsigned                        *stride_in_dw;
+       unsigned                        enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+
+       /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
+       unsigned                        hw_enabled_mask;
 
        /* The state of VGT_STRMOUT_(CONFIG|EN). */
        struct r600_atom                enable_atom;
@@ -296,25 +365,23 @@ struct r600_streamout {
 
 struct r600_ring {
        struct radeon_winsys_cs         *cs;
-       bool                            flushing;
        void (*flush)(void *ctx, unsigned flags,
                      struct pipe_fence_handle **fence);
 };
 
-struct r600_rings {
-       struct r600_ring                gfx;
-       struct r600_ring                dma;
-};
-
 struct r600_common_context {
        struct pipe_context b; /* base class */
 
        struct r600_common_screen       *screen;
        struct radeon_winsys            *ws;
+       struct radeon_winsys_ctx        *ctx;
        enum radeon_family              family;
        enum chip_class                 chip_class;
-       struct r600_rings               rings;
+       struct r600_ring                gfx;
+       struct r600_ring                dma;
+       struct pipe_fence_handle        *last_sdma_fence;
        unsigned                        initial_gfx_cs_size;
+       unsigned                        gpu_reset_counter;
 
        struct u_upload_mgr             *uploader;
        struct u_suballocator           *allocator_so_filled_size;
@@ -333,14 +400,16 @@ struct r600_common_context {
        /* Queries. */
        /* The list of active queries. Only one query of each type can be active. */
        int                             num_occlusion_queries;
-       int                             num_pipelinestat_queries;
        /* Keep track of non-timer queries, because they should be suspended
         * during context flushing.
-        * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
+        * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits,
+        * but they should be suspended between IBs. */
        struct list_head                active_nontimer_queries;
+       struct list_head                active_timer_queries;
        unsigned                        num_cs_dw_nontimer_queries_suspend;
+       unsigned                        num_cs_dw_timer_queries_suspend;
        /* If queries have been suspended. */
-       bool                            nontimer_queries_suspended;
+       bool                            queries_suspended_for_flush;
        /* Additional hardware info. */
        unsigned                        backend_mask;
        unsigned                        max_db; /* for OQ */
@@ -351,12 +420,27 @@ struct r600_common_context {
        struct pipe_query               *current_render_cond;
        unsigned                        current_render_cond_mode;
        boolean                         current_render_cond_cond;
-       boolean                         predicate_drawing;
+       bool                            predicate_drawing;
+       bool                            render_cond_force_off; /* for u_blitter */
        /* For context flushing. */
        struct pipe_query               *saved_render_cond;
        boolean                         saved_render_cond_cond;
        unsigned                        saved_render_cond_mode;
 
+       /* MSAA sample locations.
+        * The first index is the sample index.
+        * The second index is the coordinate: X, Y. */
+       float                           sample_locations_1x[1][2];
+       float                           sample_locations_2x[2][2];
+       float                           sample_locations_4x[4][2];
+       float                           sample_locations_8x[8][2];
+       float                           sample_locations_16x[16][2];
+
+       /* The list of all texture buffer objects in this context.
+        * This list is walked when a buffer is invalidated/reallocated and
+        * the GPU addresses are updated. */
+       struct list_head                texture_buffers;
+
        /* Copy one resource to another using async DMA. */
        void (*dma_copy)(struct pipe_context *ctx,
                         struct pipe_resource *dst,
@@ -367,7 +451,8 @@ struct r600_common_context {
                         const struct pipe_box *src_box);
 
        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
-                            unsigned offset, unsigned size, unsigned value);
+                            unsigned offset, unsigned size, unsigned value,
+                            bool is_framebuffer);
 
        void (*blit_decompress_depth)(struct pipe_context *ctx,
                                      struct r600_texture *texture,
@@ -386,6 +471,9 @@ struct r600_common_context {
        /* This ensures there is enough space in the command stream. */
        void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
                                  bool include_draw_vbo);
+
+       void (*set_atom_dirty)(struct r600_common_context *ctx,
+                              struct r600_atom *atom, bool dirty);
 };
 
 /* r600_buffer.c */
@@ -402,11 +490,26 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
                                         const struct pipe_resource *templ,
                                         unsigned alignment);
+struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
+                                                 unsigned bind,
+                                                 unsigned usage,
+                                                 unsigned size,
+                                                 unsigned alignment);
+struct pipe_resource *
+r600_buffer_from_user_memory(struct pipe_screen *screen,
+                            const struct pipe_resource *templ,
+                            void *user_memory);
 
 /* r600_common_pipe.c */
+void r600_draw_rectangle(struct blitter_context *blitter,
+                        int x1, int y1, int x2, int y2, float depth,
+                        enum blitter_attrib_type type,
+                        const union pipe_color_union *attrib);
 bool r600_common_screen_init(struct r600_common_screen *rscreen,
                             struct radeon_winsys *ws);
 void r600_destroy_common_screen(struct r600_common_screen *rscreen);
+void r600_preflush_suspend_features(struct r600_common_context *ctx);
+void r600_postflush_resume_features(struct r600_common_context *ctx);
 bool r600_common_context_init(struct r600_common_context *rctx,
                              struct r600_common_screen *rscreen);
 void r600_common_context_cleanup(struct r600_common_context *rctx);
@@ -414,16 +517,24 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
                          const struct tgsi_token *tokens);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-                             unsigned offset, unsigned size, unsigned value);
+                             unsigned offset, unsigned size, unsigned value,
+                             bool is_framebuffer);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
                                                  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw);
 
+/* r600_gpu_load.c */
+void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
+uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
+unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
+
 /* r600_query.c */
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
 void r600_resume_nontimer_queries(struct r600_common_context *ctx);
+void r600_suspend_timer_queries(struct r600_common_context *ctx);
+void r600_resume_timer_queries(struct r600_common_context *ctx);
 void r600_query_init_backend_mask(struct r600_common_context *ctx);
 
 /* r600_streamout.c */
@@ -458,7 +569,7 @@ unsigned r600_translate_colorswap(enum pipe_format format);
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   struct pipe_framebuffer_state *fb,
                                   struct r600_atom *fb_state,
-                                  unsigned *buffers,
+                                  unsigned *buffers, unsigned *dirty_cbufs,
                                   const union pipe_color_union *color);
 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
 void r600_init_context_texture_functions(struct r600_common_context *rctx);
@@ -470,23 +581,72 @@ extern const uint32_t eg_sample_locs_4x[4];
 extern const unsigned eg_max_dist_4x;
 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
                                unsigned sample_index, float *out_value);
-void cayman_emit_msaa_state(struct radeon_winsys_cs *cs, int nr_samples);
+void cayman_init_msaa(struct pipe_context *ctx);
+void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
+void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
+                            int ps_iter_samples, int overrast_samples);
 
 
 /* Inline helpers. */
 
-static INLINE struct r600_resource *r600_resource(struct pipe_resource *r)
+static inline struct r600_resource *r600_resource(struct pipe_resource *r)
 {
        return (struct r600_resource*)r;
 }
 
-static INLINE void
+static inline void
 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
 {
        pipe_resource_reference((struct pipe_resource **)ptr,
                                (struct pipe_resource *)res);
 }
 
+static inline unsigned r600_tex_aniso_filter(unsigned filter)
+{
+       if (filter <= 1)   return 0;
+       if (filter <= 2)   return 1;
+       if (filter <= 4)   return 2;
+       if (filter <= 8)   return 3;
+        /* else */        return 4;
+}
+
+static inline unsigned r600_wavefront_size(enum radeon_family family)
+{
+       switch (family) {
+       case CHIP_RV610:
+       case CHIP_RS780:
+       case CHIP_RV620:
+       case CHIP_RS880:
+               return 16;
+       case CHIP_RV630:
+       case CHIP_RV635:
+       case CHIP_RV730:
+       case CHIP_RV710:
+       case CHIP_PALM:
+       case CHIP_CEDAR:
+               return 32;
+       default:
+               return 64;
+       }
+}
+
+static inline enum radeon_bo_priority
+r600_get_sampler_view_priority(struct r600_resource *res)
+{
+       if (res->b.b.target == PIPE_BUFFER)
+               return RADEON_PRIO_SAMPLER_BUFFER;
+
+       if (res->b.b.nr_samples > 1)
+               return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
+
+       return RADEON_PRIO_SAMPLER_TEXTURE;
+}
+
+#define COMPUTE_DBG(rscreen, fmt, args...) \
+       do { \
+               if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
+       } while (0);
+
 #define R600_ERR(fmt, args...) \
        fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)