gallium/u_blitter: use draw_rectangle for all blits except cubemaps
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
index 70b28365da42b8c64828b86ed239be1c8e360d66..08220bdfd8d7e1f41d234bb4354f655644fe6377 100644 (file)
@@ -45,6 +45,9 @@
 #include "util/slab.h"
 #include "util/u_suballoc.h"
 #include "util/u_transfer.h"
+#include "util/u_threaded_context.h"
+
+struct u_log_context;
 
 #define ATI_VENDOR_ID 0x1002
 
 /* Pipeline & streamout query controls. */
 #define R600_CONTEXT_START_PIPELINE_STATS      (1u << 1)
 #define R600_CONTEXT_STOP_PIPELINE_STATS       (1u << 2)
-#define R600_CONTEXT_PRIVATE_FLAG              (1u << 3)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND     (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG              (1u << 4)
 
 /* special primitive types */
 #define R600_PRIM_RECTANGLE_LIST       PIPE_PRIM_MAX
 
+#define R600_NOT_QUERY         0xffffffff
+
 /* Debug flags. */
-/* logging */
-#define DBG_TEX                        (1 << 0)
-/* gap - reuse */
-#define DBG_COMPUTE            (1 << 2)
-#define DBG_VM                 (1 << 3)
-/* gap - reuse */
-/* shader logging */
-#define DBG_FS                 (1 << 5)
-#define DBG_VS                 (1 << 6)
-#define DBG_GS                 (1 << 7)
-#define DBG_PS                 (1 << 8)
-#define DBG_CS                 (1 << 9)
-#define DBG_TCS                        (1 << 10)
-#define DBG_TES                        (1 << 11)
+#define DBG_VS                 (1 << PIPE_SHADER_VERTEX)
+#define DBG_PS                 (1 << PIPE_SHADER_FRAGMENT)
+#define DBG_GS                 (1 << PIPE_SHADER_GEOMETRY)
+#define DBG_TCS                        (1 << PIPE_SHADER_TESS_CTRL)
+#define DBG_TES                        (1 << PIPE_SHADER_TESS_EVAL)
+#define DBG_CS                 (1 << PIPE_SHADER_COMPUTE)
+#define DBG_ALL_SHADERS                (DBG_FS - 1)
+#define DBG_FS                 (1 << 6) /* fetch shader */
+#define DBG_TEX                        (1 << 7)
+#define DBG_NIR                        (1 << 8)
+#define DBG_COMPUTE            (1 << 9)
+/* gap */
+#define DBG_VM                 (1 << 11)
 #define DBG_NO_IR              (1 << 12)
 #define DBG_NO_TGSI            (1 << 13)
 #define DBG_NO_ASM             (1 << 14)
 #define DBG_PREOPT_IR          (1 << 15)
 #define DBG_CHECK_IR           (1 << 16)
 #define DBG_NO_OPT_VARIANT     (1 << 17)
+#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
 /* gaps */
 #define DBG_TEST_DMA           (1 << 20)
 /* Bits 21-31 are reserved for the r600g driver. */
 /* features */
-#define DBG_NO_ASYNC_DMA       (1llu << 32)
-#define DBG_NO_HYPERZ          (1llu << 33)
-#define DBG_NO_DISCARD_RANGE   (1llu << 34)
-#define DBG_NO_2D_TILING       (1llu << 35)
-#define DBG_NO_TILING          (1llu << 36)
-#define DBG_SWITCH_ON_EOP      (1llu << 37)
-#define DBG_FORCE_DMA          (1llu << 38)
-#define DBG_PRECOMPILE         (1llu << 39)
-#define DBG_INFO               (1llu << 40)
-#define DBG_NO_WC              (1llu << 41)
-#define DBG_CHECK_VM           (1llu << 42)
-#define DBG_NO_DCC             (1llu << 43)
-#define DBG_NO_DCC_CLEAR       (1llu << 44)
-#define DBG_NO_RB_PLUS         (1llu << 45)
-#define DBG_SI_SCHED           (1llu << 46)
-#define DBG_MONOLITHIC_SHADERS (1llu << 47)
-#define DBG_NO_CE              (1llu << 48)
-#define DBG_UNSAFE_MATH                (1llu << 49)
-#define DBG_NO_DCC_FB          (1llu << 50)
-#define DBG_TEST_VMFAULT_CP    (1llu << 51)
-#define DBG_TEST_VMFAULT_SDMA  (1llu << 52)
-#define DBG_TEST_VMFAULT_SHADER        (1llu << 53)
+#define DBG_NO_ASYNC_DMA       (1ull << 32)
+#define DBG_NO_HYPERZ          (1ull << 33)
+#define DBG_NO_DISCARD_RANGE   (1ull << 34)
+#define DBG_NO_2D_TILING       (1ull << 35)
+#define DBG_NO_TILING          (1ull << 36)
+#define DBG_SWITCH_ON_EOP      (1ull << 37)
+#define DBG_FORCE_DMA          (1ull << 38)
+#define DBG_PRECOMPILE         (1ull << 39)
+#define DBG_INFO               (1ull << 40)
+#define DBG_NO_WC              (1ull << 41)
+#define DBG_CHECK_VM           (1ull << 42)
+#define DBG_NO_DCC             (1ull << 43)
+#define DBG_NO_DCC_CLEAR       (1ull << 44)
+#define DBG_NO_RB_PLUS         (1ull << 45)
+#define DBG_SI_SCHED           (1ull << 46)
+#define DBG_MONOLITHIC_SHADERS (1ull << 47)
+/* gap */
+#define DBG_UNSAFE_MATH                (1ull << 49)
+#define DBG_NO_DCC_FB          (1ull << 50)
+#define DBG_TEST_VMFAULT_CP    (1ull << 51)
+#define DBG_TEST_VMFAULT_SDMA  (1ull << 52)
+#define DBG_TEST_VMFAULT_SHADER        (1ull << 53)
+#define DBG_NO_DPBB            (1ull << 54)
+#define DBG_NO_DFSM            (1ull << 55)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 #define R600_MAX_VIEWPORTS        16
@@ -140,7 +148,7 @@ void radeon_shader_binary_clean(struct ac_shader_binary *b);
  * at the moment.
  */
 struct r600_resource {
-       struct u_resource               b;
+       struct threaded_resource        b;
 
        /* Winsys objects. */
        struct pb_buffer                *buf;
@@ -179,12 +187,15 @@ struct r600_resource {
        bool                            TC_L2_dirty;
 
        /* Whether the resource has been exported via resource_get_handle. */
-       bool                            is_shared;
        unsigned                        external_usage; /* PIPE_HANDLE_USAGE_* */
+
+       /* Whether this resource is referenced by bindless handles. */
+       bool                            texture_handle_allocated;
+       bool                            image_handle_allocated;
 };
 
 struct r600_transfer {
-       struct pipe_transfer            transfer;
+       struct threaded_transfer        b;
        struct r600_resource            *staging;
        unsigned                        offset;
 };
@@ -197,6 +208,7 @@ struct r600_fmask_info {
        unsigned bank_height;
        unsigned slice_tile_max;
        unsigned tile_mode_index;
+       unsigned tile_swizzle;
 };
 
 struct r600_cmask_info {
@@ -232,7 +244,7 @@ struct r600_texture {
        unsigned                        last_msaa_resolve_target_micro_mode;
 
        /* Depth buffer compression and fast clear. */
-       struct r600_resource            *htile_buffer;
+       uint64_t                        htile_offset;
        bool                            tc_compatible_htile;
        bool                            depth_cleared; /* if it was cleared at least once */
        float                           depth_clear_value;
@@ -365,13 +377,19 @@ union r600_mmio_counters {
                struct r600_mmio_counter meq;
                struct r600_mmio_counter me;
                struct r600_mmio_counter surf_sync;
-               struct r600_mmio_counter dma;
+               struct r600_mmio_counter cp_dma;
                struct r600_mmio_counter scratch_ram;
-               struct r600_mmio_counter ce;
        } named;
        unsigned array[0];
 };
 
+struct r600_memory_object {
+       struct pipe_memory_object       b;
+       struct pb_buffer                *buf;
+       uint32_t                        stride;
+       uint32_t                        offset;
+};
+
 struct r600_common_screen {
        struct pipe_screen              b;
        struct radeon_winsys            *ws;
@@ -438,6 +456,11 @@ struct r600_common_screen {
                 */
                unsigned cp_to_L2;
 
+               /* Context flags to set so that all writes from earlier jobs
+                * that end in L2 are seen by CP.
+                */
+               unsigned L2_to_cp;
+
                /* Context flags to set so that all writes from earlier
                 * compute jobs are seen by L2 clients.
                 */
@@ -486,7 +509,7 @@ struct r600_streamout {
 
        /* External state which comes from the vertex shader,
         * it must be set explicitly when binding a shader. */
-       unsigned                        *stride_in_dw;
+       uint16_t                        *stride_in_dw;
        unsigned                        enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
 
        /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
@@ -547,14 +570,18 @@ struct r600_common_context {
        struct r600_ring                dma;
        struct pipe_fence_handle        *last_gfx_fence;
        struct pipe_fence_handle        *last_sdma_fence;
+       struct r600_resource            *eop_bug_scratch;
        unsigned                        num_gfx_cs_flushes;
        unsigned                        initial_gfx_cs_size;
        unsigned                        gpu_reset_counter;
        unsigned                        last_dirty_tex_counter;
        unsigned                        last_compressed_colortex_counter;
+       unsigned                        last_num_draw_calls;
 
+       struct threaded_context         *tc;
        struct u_suballocator           *allocator_zeroed_memory;
        struct slab_child_pool          pool_transfers;
+       struct slab_child_pool          pool_transfers_unsync; /* for threaded_context */
 
        /* Current unaccounted memory usage. */
        uint64_t                        vram;
@@ -580,6 +607,9 @@ struct r600_common_context {
        unsigned                        num_cs_dw_queries_suspend;
        /* Misc stats. */
        unsigned                        num_draw_calls;
+       unsigned                        num_decompress_calls;
+       unsigned                        num_mrt_draw_calls;
+       unsigned                        num_prim_restart_calls;
        unsigned                        num_spill_draw_calls;
        unsigned                        num_compute_calls;
        unsigned                        num_spill_compute_calls;
@@ -588,9 +618,11 @@ struct r600_common_context {
        unsigned                        num_vs_flushes;
        unsigned                        num_ps_flushes;
        unsigned                        num_cs_flushes;
-       unsigned                        num_fb_cache_flushes;
+       unsigned                        num_cb_cache_flushes;
+       unsigned                        num_db_cache_flushes;
        unsigned                        num_L2_invalidates;
        unsigned                        num_L2_writebacks;
+       unsigned                        num_resident_handles;
        uint64_t                        num_alloc_tex_transfer_bytes;
        unsigned                        last_tex_ps_draw_ratio; /* for query */
 
@@ -631,6 +663,7 @@ struct r600_common_context {
 
        struct pipe_debug_callback      debug;
        struct pipe_device_reset_callback device_reset_callback;
+       struct u_log_context            *log;
 
        void                            *query_result_shader;
 
@@ -664,6 +697,12 @@ struct r600_common_context {
         * the buffer is bound, including all resource descriptors. */
        void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
 
+       /* Update all resource bindings where the buffer is bound, including
+        * all resource descriptors. This is invalidate_buffer without
+        * the invalidation. */
+       void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
+                             uint64_t old_gpu_address);
+
        /* Enable or disable occlusion queries. */
        void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable);
 
@@ -681,7 +720,7 @@ struct r600_common_context {
                                enum ring_type ring);
 };
 
-/* r600_buffer.c */
+/* r600_buffer_common.c */
 bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
                                     struct pb_buffer *buf,
                                     enum radeon_bo_usage usage);
@@ -712,20 +751,24 @@ r600_buffer_from_user_memory(struct pipe_screen *screen,
 void
 r600_invalidate_resource(struct pipe_context *ctx,
                         struct pipe_resource *resource);
+void r600_replace_buffer_storage(struct pipe_context *ctx,
+                                struct pipe_resource *dst,
+                                struct pipe_resource *src);
 
 /* r600_common_pipe.c */
 void r600_gfx_write_event_eop(struct r600_common_context *ctx,
                              unsigned event, unsigned event_flags,
                              unsigned data_sel,
                              struct r600_resource *buf, uint64_t va,
-                             uint32_t old_fence, uint32_t new_fence);
+                             uint32_t new_fence, unsigned query_type);
 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
 void r600_gfx_wait_fence(struct r600_common_context *ctx,
                         uint64_t va, uint32_t ref, uint32_t mask);
 void r600_draw_rectangle(struct blitter_context *blitter,
-                        int x1, int y1, int x2, int y2, float depth,
+                        int x1, int y1, int x2, int y2,
+                        float depth, unsigned num_instances,
                         enum blitter_attrib_type type,
-                        const union pipe_color_union *attrib);
+                        const union blitter_attrib *attrib);
 bool r600_common_screen_init(struct r600_common_screen *rscreen,
                             struct radeon_winsys *ws);
 void r600_destroy_common_screen(struct r600_common_screen *rscreen);
@@ -747,7 +790,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family);
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
                         struct r600_resource *dst, struct r600_resource *src);
 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
-                   struct radeon_saved_cs *saved);
+                   struct radeon_saved_cs *saved, bool get_buffer_list);
 void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
 bool r600_check_device_reset(struct r600_common_context *rctx);
 
@@ -800,7 +843,7 @@ bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
                                     struct pipe_resource *texture,
                                     struct r600_texture **staging);
 void r600_print_texture_info(struct r600_common_screen *rscreen,
-                            struct r600_texture *rtex, FILE *f);
+                            struct r600_texture *rtex, struct u_log_context *log);
 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
                                        const struct pipe_resource *templ);
 bool vi_dcc_formats_compatible(enum pipe_format format1,
@@ -830,7 +873,7 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   struct pipe_framebuffer_state *fb,
                                   struct r600_atom *fb_state,
-                                  unsigned *buffers, unsigned *dirty_cbufs,
+                                  unsigned *buffers, ubyte *dirty_cbufs,
                                   const union pipe_color_union *color);
 bool r600_texture_disable_dcc(struct r600_common_context *rctx,
                              struct r600_texture *rtex);
@@ -972,6 +1015,19 @@ vi_dcc_enabled(struct r600_texture *tex, unsigned level)
        return tex->dcc_offset && level < tex->surface.num_dcc_levels;
 }
 
+static inline bool
+r600_htile_enabled(struct r600_texture *tex, unsigned level)
+{
+       return tex->htile_offset && level == 0;
+}
+
+static inline bool
+vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
+{
+       assert(!tex->tc_compatible_htile || tex->htile_offset);
+       return tex->tc_compatible_htile && level == 0;
+}
+
 #define COMPUTE_DBG(rscreen, fmt, args...) \
        do { \
                if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
@@ -987,4 +1043,9 @@ vi_dcc_enabled(struct r600_texture *tex, unsigned level)
        (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |        \
         (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
 
+static inline int S_FIXED(float value, unsigned frac_bits)
+{
+       return value * (1 << frac_bits);
+}
+
 #endif