ac/surface: compute tile swizzle for GFX9
[mesa.git] / src / gallium / drivers / radeon / r600_pipe_common.h
index fb52dfb529772c58dcca30b29e9e3a751a9c13bf..79419036665374fd521a3b268b80ad5ef305e2c6 100644 (file)
@@ -19,9 +19,6 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
- *
- * Authors: Marek Olšák <maraeo@gmail.com>
- *
  */
 
 /**
 #include "util/u_transfer.h"
 #include "util/u_threaded_context.h"
 
-#define ATI_VENDOR_ID 0x1002
+struct u_log_context;
+struct si_screen;
+struct si_context;
 
 #define R600_RESOURCE_FLAG_TRANSFER            (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH       (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
 #define R600_RESOURCE_FLAG_FORCE_TILING                (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define R600_RESOURCE_FLAG_DISABLE_DCC         (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
 #define R600_RESOURCE_FLAG_UNMAPPABLE          (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
-
-#define R600_CONTEXT_STREAMOUT_FLUSH           (1u << 0)
-/* Pipeline & streamout query controls. */
-#define R600_CONTEXT_START_PIPELINE_STATS      (1u << 1)
-#define R600_CONTEXT_STOP_PIPELINE_STATS       (1u << 2)
-#define R600_CONTEXT_PRIVATE_FLAG              (1u << 3)
-
-/* special primitive types */
-#define R600_PRIM_RECTANGLE_LIST       PIPE_PRIM_MAX
+#define R600_RESOURCE_FLAG_READ_ONLY           (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
+#define R600_RESOURCE_FLAG_32BIT               (PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
 
 /* Debug flags. */
-/* logging */
-#define DBG_TEX                        (1 << 0)
-#define DBG_NIR                        (1 << 1)
-#define DBG_COMPUTE            (1 << 2)
-#define DBG_VM                 (1 << 3)
-/* gap - reuse */
-/* shader logging */
-#define DBG_FS                 (1 << 5)
-#define DBG_VS                 (1 << 6)
-#define DBG_GS                 (1 << 7)
-#define DBG_PS                 (1 << 8)
-#define DBG_CS                 (1 << 9)
-#define DBG_TCS                        (1 << 10)
-#define DBG_TES                        (1 << 11)
-#define DBG_NO_IR              (1 << 12)
-#define DBG_NO_TGSI            (1 << 13)
-#define DBG_NO_ASM             (1 << 14)
-#define DBG_PREOPT_IR          (1 << 15)
-#define DBG_CHECK_IR           (1 << 16)
-#define DBG_NO_OPT_VARIANT     (1 << 17)
-#define DBG_FS_CORRECT_DERIVS_AFTER_KILL (1 << 18)
-/* gaps */
-#define DBG_TEST_DMA           (1 << 20)
-/* Bits 21-31 are reserved for the r600g driver. */
-/* features */
-#define DBG_NO_ASYNC_DMA       (1ull << 32)
-#define DBG_NO_HYPERZ          (1ull << 33)
-#define DBG_NO_DISCARD_RANGE   (1ull << 34)
-#define DBG_NO_2D_TILING       (1ull << 35)
-#define DBG_NO_TILING          (1ull << 36)
-#define DBG_SWITCH_ON_EOP      (1ull << 37)
-#define DBG_FORCE_DMA          (1ull << 38)
-#define DBG_PRECOMPILE         (1ull << 39)
-#define DBG_INFO               (1ull << 40)
-#define DBG_NO_WC              (1ull << 41)
-#define DBG_CHECK_VM           (1ull << 42)
-#define DBG_NO_DCC             (1ull << 43)
-#define DBG_NO_DCC_CLEAR       (1ull << 44)
-#define DBG_NO_RB_PLUS         (1ull << 45)
-#define DBG_SI_SCHED           (1ull << 46)
-#define DBG_MONOLITHIC_SHADERS (1ull << 47)
-#define DBG_NO_CE              (1ull << 48)
-#define DBG_UNSAFE_MATH                (1ull << 49)
-#define DBG_NO_DCC_FB          (1ull << 50)
-#define DBG_TEST_VMFAULT_CP    (1ull << 51)
-#define DBG_TEST_VMFAULT_SDMA  (1ull << 52)
-#define DBG_TEST_VMFAULT_SHADER        (1ull << 53)
+enum {
+       /* Shader logging options: */
+       DBG_VS = PIPE_SHADER_VERTEX,
+       DBG_PS = PIPE_SHADER_FRAGMENT,
+       DBG_GS = PIPE_SHADER_GEOMETRY,
+       DBG_TCS = PIPE_SHADER_TESS_CTRL,
+       DBG_TES = PIPE_SHADER_TESS_EVAL,
+       DBG_CS = PIPE_SHADER_COMPUTE,
+       DBG_NO_IR,
+       DBG_NO_TGSI,
+       DBG_NO_ASM,
+       DBG_PREOPT_IR,
+
+       /* Shader compiler options the shader cache should be aware of: */
+       DBG_FS_CORRECT_DERIVS_AFTER_KILL,
+       DBG_UNSAFE_MATH,
+       DBG_SI_SCHED,
+
+       /* Shader compiler options (with no effect on the shader cache): */
+       DBG_CHECK_IR,
+       DBG_NIR,
+       DBG_MONOLITHIC_SHADERS,
+       DBG_NO_OPT_VARIANT,
+
+       /* Information logging options: */
+       DBG_INFO,
+       DBG_TEX,
+       DBG_COMPUTE,
+       DBG_VM,
+
+       /* Driver options: */
+       DBG_FORCE_DMA,
+       DBG_NO_ASYNC_DMA,
+       DBG_NO_WC,
+       DBG_CHECK_VM,
+       DBG_RESERVE_VMID,
+
+       /* 3D engine options: */
+       DBG_SWITCH_ON_EOP,
+       DBG_NO_OUT_OF_ORDER,
+       DBG_NO_DPBB,
+       DBG_NO_DFSM,
+       DBG_DPBB,
+       DBG_DFSM,
+       DBG_NO_HYPERZ,
+       DBG_NO_RB_PLUS,
+       DBG_NO_2D_TILING,
+       DBG_NO_TILING,
+       DBG_NO_DCC,
+       DBG_NO_DCC_CLEAR,
+       DBG_NO_DCC_FB,
+       DBG_NO_DCC_MSAA,
+       DBG_DCC_MSAA,
+
+       /* Tests: */
+       DBG_TEST_DMA,
+       DBG_TEST_VMFAULT_CP,
+       DBG_TEST_VMFAULT_SDMA,
+       DBG_TEST_VMFAULT_SHADER,
+};
+
+#define DBG_ALL_SHADERS                (((1 << (DBG_CS + 1)) - 1))
+#define DBG(name)              (1ull << DBG_##name)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
-#define R600_MAX_VIEWPORTS        16
 
 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
 
-enum r600_coherency {
-       R600_COHERENCY_NONE, /* no cache flushes needed */
-       R600_COHERENCY_SHADER,
-       R600_COHERENCY_CB_META,
-};
-
-#ifdef PIPE_ARCH_BIG_ENDIAN
-#define R600_BIG_ENDIAN 1
-#else
-#define R600_BIG_ENDIAN 0
-#endif
-
 struct r600_common_context;
 struct r600_perfcounters;
 struct tgsi_shader_info;
 struct r600_qbo_state;
 
-void radeon_shader_binary_init(struct ac_shader_binary *b);
-void radeon_shader_binary_clean(struct ac_shader_binary *b);
-
 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
  * at the moment.
  */
@@ -157,6 +149,7 @@ struct r600_resource {
        enum radeon_bo_domain           domains;
        enum radeon_bo_flag             flags;
        unsigned                        bind_history;
+       int                             max_forced_staging_uploads;
 
        /* The buffer range which is initialized (with a write transfer,
         * streamout, DMA, or as a random access target). The rest of
@@ -216,17 +209,9 @@ struct r600_cmask_info {
 struct r600_texture {
        struct r600_resource            resource;
 
+       struct radeon_surf              surface;
        uint64_t                        size;
-       unsigned                        num_level0_transfers;
-       enum pipe_format                db_render_format;
-       bool                            is_depth;
-       bool                            db_compatible;
-       bool                            can_sample_z;
-       bool                            can_sample_s;
-       unsigned                        dirty_level_mask; /* each bit says if that mipmap is compressed */
-       unsigned                        stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
        struct r600_texture             *flushed_depth_texture;
-       struct radeon_surf              surface;
 
        /* Colorbuffer compression and fast clear. */
        struct r600_fmask_info          fmask;
@@ -236,17 +221,37 @@ struct r600_texture {
        unsigned                        cb_color_info; /* fast clear enable bit */
        unsigned                        color_clear_value[2];
        unsigned                        last_msaa_resolve_target_micro_mode;
+       unsigned                        num_level0_transfers;
 
        /* Depth buffer compression and fast clear. */
        uint64_t                        htile_offset;
-       bool                            tc_compatible_htile;
-       bool                            depth_cleared; /* if it was cleared at least once */
        float                           depth_clear_value;
-       bool                            stencil_cleared; /* if it was cleared at least once */
+       uint16_t                        dirty_level_mask; /* each bit says if that mipmap is compressed */
+       uint16_t                        stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
+       enum pipe_format                db_render_format:16;
        uint8_t                         stencil_clear_value;
+       bool                            tc_compatible_htile:1;
+       bool                            depth_cleared:1; /* if it was cleared at least once */
+       bool                            stencil_cleared:1; /* if it was cleared at least once */
+       bool                            upgraded_depth:1; /* upgraded from unorm to Z32_FLOAT */
+       bool                            is_depth:1;
+       bool                            db_compatible:1;
+       bool                            can_sample_z:1;
+       bool                            can_sample_s:1;
 
-       bool                            non_disp_tiling; /* R600-Cayman only */
-
+       /* We need to track DCC dirtiness, because st/dri usually calls
+        * flush_resource twice per frame (not a bug) and we don't wanna
+        * decompress DCC twice. Also, the dirty tracking must be done even
+        * if DCC isn't used, because it's required by the DCC usage analysis
+        * for a possible future enablement.
+        */
+       bool                            separate_dcc_dirty:1;
+       /* Statistics gathering for the DCC enablement heuristic. */
+       bool                            dcc_gather_statistics:1;
+       /* Counter that should be non-zero if the texture is bound to a
+        * framebuffer.
+        */
+       unsigned                        framebuffers_bound;
        /* Whether the texture is a displayable back buffer and needs DCC
         * decompression, which is expensive. Therefore, it's enabled only
         * if statistics suggest that it will pay off and it's allocated
@@ -257,15 +262,6 @@ struct r600_texture {
        struct r600_resource            *dcc_separate_buffer;
        /* When DCC is temporarily disabled, the separate buffer is here. */
        struct r600_resource            *last_dcc_separate_buffer;
-       /* We need to track DCC dirtiness, because st/dri usually calls
-        * flush_resource twice per frame (not a bug) and we don't wanna
-        * decompress DCC twice. Also, the dirty tracking must be done even
-        * if DCC isn't used, because it's required by the DCC usage analysis
-        * for a possible future enablement.
-        */
-       bool                            separate_dcc_dirty;
-       /* Statistics gathering for the DCC enablement heuristic. */
-       bool                            dcc_gather_statistics;
        /* Estimate of how much this color buffer is written to in units of
         * full-screen draws: ps_invocations / (width * height)
         * Shader kills, late Z, and blending with trivial discards make it
@@ -274,67 +270,47 @@ struct r600_texture {
        unsigned                        ps_draw_ratio;
        /* The number of clears since the last DCC usage analysis. */
        unsigned                        num_slow_clears;
-
-       /* Counter that should be non-zero if the texture is bound to a
-        * framebuffer. Implemented in radeonsi only.
-        */
-       uint32_t                        framebuffers_bound;
 };
 
 struct r600_surface {
        struct pipe_surface             base;
 
        /* These can vary with block-compressed textures. */
-       unsigned width0;
-       unsigned height0;
+       uint16_t width0;
+       uint16_t height0;
 
-       bool color_initialized;
-       bool depth_initialized;
+       bool color_initialized:1;
+       bool depth_initialized:1;
 
        /* Misc. color flags. */
-       bool alphatest_bypass;
-       bool export_16bpc;
-       bool color_is_int8;
-       bool color_is_int10;
-       bool dcc_incompatible;
+       bool color_is_int8:1;
+       bool color_is_int10:1;
+       bool dcc_incompatible:1;
 
        /* Color registers. */
        unsigned cb_color_info;
-       unsigned cb_color_base;
        unsigned cb_color_view;
-       unsigned cb_color_size;         /* R600 only */
-       unsigned cb_color_dim;          /* EG only */
-       unsigned cb_color_pitch;        /* EG and later */
-       unsigned cb_color_slice;        /* EG and later */
-       unsigned cb_color_attrib;       /* EG and later */
+       unsigned cb_color_attrib;
        unsigned cb_color_attrib2;      /* GFX9 and later */
        unsigned cb_dcc_control;        /* VI and later */
-       unsigned cb_color_fmask;        /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
-       unsigned cb_color_fmask_slice;  /* EG and later */
-       unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
-       unsigned cb_color_mask;         /* R600 only */
-       unsigned spi_shader_col_format;         /* SI+, no blending, no alpha-to-coverage. */
-       unsigned spi_shader_col_format_alpha;   /* SI+, alpha-to-coverage */
-       unsigned spi_shader_col_format_blend;   /* SI+, blending without alpha. */
-       unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
-       struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
-       struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
+       unsigned spi_shader_col_format:8;       /* no blending, no alpha-to-coverage. */
+       unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */
+       unsigned spi_shader_col_format_blend:8; /* blending without alpha. */
+       unsigned spi_shader_col_format_blend_alpha:8; /* blending with alpha. */
 
        /* DB registers. */
-       uint64_t db_depth_base;         /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
-       uint64_t db_stencil_base;       /* EG and later */
+       uint64_t db_depth_base;         /* DB_Z_READ/WRITE_BASE */
+       uint64_t db_stencil_base;
        uint64_t db_htile_data_base;
-       unsigned db_depth_info;         /* R600 only, then SI and later */
-       unsigned db_z_info;             /* EG and later */
+       unsigned db_depth_info;
+       unsigned db_z_info;
        unsigned db_z_info2;            /* GFX9+ */
        unsigned db_depth_view;
        unsigned db_depth_size;
-       unsigned db_depth_slice;        /* EG and later */
-       unsigned db_stencil_info;       /* EG and later */
+       unsigned db_depth_slice;
+       unsigned db_stencil_info;
        unsigned db_stencil_info2;      /* GFX9+ */
-       unsigned db_prefetch_limit;     /* R600 only */
        unsigned db_htile_surface;
-       unsigned db_preload_control;    /* EG and later */
 };
 
 struct r600_mmio_counter {
@@ -371,161 +347,26 @@ union r600_mmio_counters {
                struct r600_mmio_counter meq;
                struct r600_mmio_counter me;
                struct r600_mmio_counter surf_sync;
-               struct r600_mmio_counter dma;
+               struct r600_mmio_counter cp_dma;
                struct r600_mmio_counter scratch_ram;
-               struct r600_mmio_counter ce;
        } named;
        unsigned array[0];
 };
 
-struct r600_common_screen {
-       struct pipe_screen              b;
-       struct radeon_winsys            *ws;
-       enum radeon_family              family;
-       enum chip_class                 chip_class;
-       struct radeon_info              info;
-       uint64_t                        debug_flags;
-       bool                            has_cp_dma;
-       bool                            has_streamout;
-       bool                            has_rbplus;     /* if RB+ registers exist */
-       bool                            rbplus_allowed; /* if RB+ is allowed */
-
-       struct disk_cache               *disk_shader_cache;
-
-       struct slab_parent_pool         pool_transfers;
-
-       /* Texture filter settings. */
-       int                             force_aniso; /* -1 = disabled */
-
-       /* Auxiliary context. Mainly used to initialize resources.
-        * It must be locked prior to using and flushed before unlocking. */
-       struct pipe_context             *aux_context;
-       mtx_t                           aux_context_lock;
-
-       /* This must be in the screen, because UE4 uses one context for
-        * compilation and another one for rendering.
-        */
-       unsigned                        num_compilations;
-       /* Along with ST_DEBUG=precompile, this should show if applications
-        * are loading shaders on demand. This is a monotonic counter.
-        */
-       unsigned                        num_shaders_created;
-       unsigned                        num_shader_cache_hits;
-
-       /* GPU load thread. */
-       mtx_t                           gpu_load_mutex;
-       thrd_t                          gpu_load_thread;
-       union r600_mmio_counters        mmio_counters;
-       volatile unsigned               gpu_load_stop_thread; /* bool */
-
-       char                            renderer_string[100];
-
-       /* Performance counters. */
-       struct r600_perfcounters        *perfcounters;
-
-       /* If pipe_screen wants to recompute and re-emit the framebuffer,
-        * sampler, and image states of all contexts, it should atomically
-        * increment this.
-        *
-        * Each context will compare this with its own last known value of
-        * the counter before drawing and re-emit the states accordingly.
-        */
-       unsigned                        dirty_tex_counter;
-
-       /* Atomically increment this counter when an existing texture's
-        * metadata is enabled or disabled in a way that requires changing
-        * contexts' compressed texture binding masks.
-        */
-       unsigned                        compressed_colortex_counter;
-
-       struct {
-               /* Context flags to set so that all writes from earlier jobs
-                * in the CP are seen by L2 clients.
-                */
-               unsigned cp_to_L2;
-
-               /* Context flags to set so that all writes from earlier
-                * compute jobs are seen by L2 clients.
-                */
-               unsigned compute_to_L2;
-       } barrier_flags;
-
-       void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
-                                     struct r600_texture *rtex,
-                                     struct radeon_bo_metadata *md);
-
-       void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
-                                   struct r600_texture *rtex,
-                                   struct radeon_bo_metadata *md);
+struct r600_memory_object {
+       struct pipe_memory_object       b;
+       struct pb_buffer                *buf;
+       uint32_t                        stride;
+       uint32_t                        offset;
 };
 
 /* This encapsulates a state or an operation which can emitted into the GPU
  * command stream. */
 struct r600_atom {
        void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
-       unsigned                num_dw;
        unsigned short          id;
 };
 
-struct r600_so_target {
-       struct pipe_stream_output_target b;
-
-       /* The buffer where BUFFER_FILLED_SIZE is stored. */
-       struct r600_resource    *buf_filled_size;
-       unsigned                buf_filled_size_offset;
-       bool                    buf_filled_size_valid;
-
-       unsigned                stride_in_dw;
-};
-
-struct r600_streamout {
-       struct r600_atom                begin_atom;
-       bool                            begin_emitted;
-       unsigned                        num_dw_for_end;
-
-       unsigned                        enabled_mask;
-       unsigned                        num_targets;
-       struct r600_so_target           *targets[PIPE_MAX_SO_BUFFERS];
-
-       unsigned                        append_bitmask;
-       bool                            suspended;
-
-       /* External state which comes from the vertex shader,
-        * it must be set explicitly when binding a shader. */
-       uint16_t                        *stride_in_dw;
-       unsigned                        enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
-
-       /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
-       unsigned                        hw_enabled_mask;
-
-       /* The state of VGT_STRMOUT_(CONFIG|EN). */
-       struct r600_atom                enable_atom;
-       bool                            streamout_enabled;
-       bool                            prims_gen_query_enabled;
-       int                             num_prims_gen_queries;
-};
-
-struct r600_signed_scissor {
-       int minx;
-       int miny;
-       int maxx;
-       int maxy;
-};
-
-struct r600_scissors {
-       struct r600_atom                atom;
-       unsigned                        dirty_mask;
-       struct pipe_scissor_state       states[R600_MAX_VIEWPORTS];
-};
-
-struct r600_viewports {
-       struct r600_atom                atom;
-       unsigned                        dirty_mask;
-       unsigned                        depth_range_dirty_mask;
-       struct pipe_viewport_state      states[R600_MAX_VIEWPORTS];
-       struct r600_signed_scissor      as_scissor[R600_MAX_VIEWPORTS];
-};
-
 struct r600_ring {
        struct radeon_winsys_cs         *cs;
        void (*flush)(void *ctx, unsigned flags,
@@ -544,7 +385,7 @@ struct radeon_saved_cs {
 struct r600_common_context {
        struct pipe_context b; /* base class */
 
-       struct r600_common_screen       *screen;
+       struct si_screen                *screen;
        struct radeon_winsys            *ws;
        struct radeon_winsys_ctx        *ctx;
        enum radeon_family              family;
@@ -553,11 +394,14 @@ struct r600_common_context {
        struct r600_ring                dma;
        struct pipe_fence_handle        *last_gfx_fence;
        struct pipe_fence_handle        *last_sdma_fence;
+       struct r600_resource            *eop_bug_scratch;
+       struct u_upload_mgr             *cached_gtt_allocator;
        unsigned                        num_gfx_cs_flushes;
        unsigned                        initial_gfx_cs_size;
        unsigned                        gpu_reset_counter;
        unsigned                        last_dirty_tex_counter;
        unsigned                        last_compressed_colortex_counter;
+       unsigned                        last_num_draw_calls;
 
        struct threaded_context         *tc;
        struct u_suballocator           *allocator_zeroed_memory;
@@ -568,15 +412,6 @@ struct r600_common_context {
        uint64_t                        vram;
        uint64_t                        gtt;
 
-       /* States. */
-       struct r600_streamout           streamout;
-       struct r600_scissors            scissors;
-       struct r600_viewports           viewports;
-       bool                            scissor_enabled;
-       bool                            clip_halfz;
-       bool                            vs_writes_viewport_index;
-       bool                            vs_disables_clipping_viewport;
-
        /* Additional context states. */
        unsigned flags; /* flush flags */
 
@@ -588,6 +423,7 @@ struct r600_common_context {
        unsigned                        num_cs_dw_queries_suspend;
        /* Misc stats. */
        unsigned                        num_draw_calls;
+       unsigned                        num_decompress_calls;
        unsigned                        num_mrt_draw_calls;
        unsigned                        num_prim_restart_calls;
        unsigned                        num_spill_draw_calls;
@@ -613,15 +449,6 @@ struct r600_common_context {
        bool                            render_cond_invert;
        bool                            render_cond_force_off; /* for u_blitter */
 
-       /* MSAA sample locations.
-        * The first index is the sample index.
-        * The second index is the coordinate: X, Y. */
-       float                           sample_locations_1x[1][2];
-       float                           sample_locations_2x[2][2];
-       float                           sample_locations_4x[4][2];
-       float                           sample_locations_8x[8][2];
-       float                           sample_locations_16x[16][2];
-
        /* Statistics gathering for the DCC enablement heuristic. It can't be
         * in r600_texture because r600_texture can be shared by multiple
         * contexts. This is for back buffers only. We shouldn't get too many
@@ -641,8 +468,8 @@ struct r600_common_context {
                bool                            query_active;
        } dcc_stats[5];
 
-       struct pipe_debug_callback      debug;
        struct pipe_device_reset_callback device_reset_callback;
+       struct u_log_context            *log;
 
        void                            *query_result_shader;
 
@@ -658,10 +485,6 @@ struct r600_common_context {
        void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
                                 uint64_t offset, uint64_t size, unsigned value);
 
-       void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
-                            uint64_t offset, uint64_t size, unsigned value,
-                            enum r600_coherency coher);
-
        void (*blit_decompress_depth)(struct pipe_context *ctx,
                                      struct r600_texture *texture,
                                      struct r600_texture *staging,
@@ -683,7 +506,9 @@ struct r600_common_context {
                              uint64_t old_gpu_address);
 
        /* Enable or disable occlusion queries. */
-       void (*set_occlusion_query_state)(struct pipe_context *ctx, bool enable);
+       void (*set_occlusion_query_state)(struct pipe_context *ctx,
+                                         bool old_enable,
+                                         bool old_perfect_enable);
 
        void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
 
@@ -700,129 +525,90 @@ struct r600_common_context {
 };
 
 /* r600_buffer_common.c */
-bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
-                                    struct pb_buffer *buf,
-                                    enum radeon_bo_usage usage);
-void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
-                                      struct r600_resource *resource,
-                                      unsigned usage);
-void r600_buffer_subdata(struct pipe_context *ctx,
-                        struct pipe_resource *buffer,
-                        unsigned usage, unsigned offset,
-                        unsigned size, const void *data);
-void r600_init_resource_fields(struct r600_common_screen *rscreen,
-                              struct r600_resource *res,
-                              uint64_t size, unsigned alignment);
-bool r600_alloc_resource(struct r600_common_screen *rscreen,
-                        struct r600_resource *res);
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
-                                        const struct pipe_resource *templ,
-                                        unsigned alignment);
-struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
-                                                 unsigned flags,
-                                                 unsigned usage,
-                                                 unsigned size,
-                                                 unsigned alignment);
-struct pipe_resource *
-r600_buffer_from_user_memory(struct pipe_screen *screen,
-                            const struct pipe_resource *templ,
-                            void *user_memory);
-void
-r600_invalidate_resource(struct pipe_context *ctx,
-                        struct pipe_resource *resource);
-void r600_replace_buffer_storage(struct pipe_context *ctx,
-                                struct pipe_resource *dst,
-                                struct pipe_resource *src);
+bool si_rings_is_buffer_referenced(struct r600_common_context *ctx,
+                                  struct pb_buffer *buf,
+                                  enum radeon_bo_usage usage);
+void *si_buffer_map_sync_with_rings(struct r600_common_context *ctx,
+                                   struct r600_resource *resource,
+                                   unsigned usage);
+void si_init_resource_fields(struct si_screen *sscreen,
+                            struct r600_resource *res,
+                            uint64_t size, unsigned alignment);
+bool si_alloc_resource(struct si_screen *sscreen,
+                      struct r600_resource *res);
+struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
+                                              unsigned flags,
+                                              unsigned usage,
+                                              unsigned size,
+                                              unsigned alignment);
+void si_replace_buffer_storage(struct pipe_context *ctx,
+                              struct pipe_resource *dst,
+                              struct pipe_resource *src);
+void si_init_screen_buffer_functions(struct si_screen *sscreen);
+void si_init_buffer_functions(struct si_context *sctx);
 
 /* r600_common_pipe.c */
-void r600_gfx_write_event_eop(struct r600_common_context *ctx,
-                             unsigned event, unsigned event_flags,
-                             unsigned data_sel,
-                             struct r600_resource *buf, uint64_t va,
-                             uint32_t old_fence, uint32_t new_fence);
-unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
-void r600_gfx_wait_fence(struct r600_common_context *ctx,
-                        uint64_t va, uint32_t ref, uint32_t mask);
-void r600_draw_rectangle(struct blitter_context *blitter,
-                        int x1, int y1, int x2, int y2, float depth,
-                        enum blitter_attrib_type type,
-                        const union pipe_color_union *attrib);
-bool r600_common_screen_init(struct r600_common_screen *rscreen,
-                            struct radeon_winsys *ws, unsigned flags);
-void r600_destroy_common_screen(struct r600_common_screen *rscreen);
-void r600_preflush_suspend_features(struct r600_common_context *ctx);
-void r600_postflush_resume_features(struct r600_common_context *ctx);
-bool r600_common_context_init(struct r600_common_context *rctx,
-                             struct r600_common_screen *rscreen,
-                             unsigned context_flags);
-void r600_common_context_cleanup(struct r600_common_context *rctx);
-bool r600_can_dump_shader(struct r600_common_screen *rscreen,
-                         unsigned processor);
-bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
-                             unsigned processor);
-void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-                             uint64_t offset, uint64_t size, unsigned value);
-struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
-                                                 const struct pipe_resource *templ);
-const char *r600_get_llvm_processor_name(enum radeon_family family);
-void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
-                        struct r600_resource *dst, struct r600_resource *src);
-void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
-                   struct radeon_saved_cs *saved, bool get_buffer_list);
-void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
-bool r600_check_device_reset(struct r600_common_context *rctx);
+void si_gfx_write_event_eop(struct r600_common_context *ctx,
+                           unsigned event, unsigned event_flags,
+                           unsigned data_sel,
+                           struct r600_resource *buf, uint64_t va,
+                           uint32_t new_fence, unsigned query_type);
+unsigned si_gfx_write_fence_dwords(struct si_screen *screen);
+void si_gfx_wait_fence(struct r600_common_context *ctx,
+                      uint64_t va, uint32_t ref, uint32_t mask);
+bool si_common_context_init(struct r600_common_context *rctx,
+                           struct si_screen *sscreen,
+                           unsigned context_flags);
+void si_common_context_cleanup(struct r600_common_context *rctx);
+void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst,
+                           uint64_t offset, uint64_t size, unsigned value);
+void si_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
+                      struct r600_resource *dst, struct r600_resource *src);
+void si_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
+               struct radeon_saved_cs *saved, bool get_buffer_list);
+void si_clear_saved_cs(struct radeon_saved_cs *saved);
+bool si_check_device_reset(struct r600_common_context *rctx);
 
 /* r600_gpu_load.c */
-void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
-uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
-unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
-                         uint64_t begin);
+void si_gpu_load_kill_thread(struct si_screen *sscreen);
+uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);
+unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
+                       uint64_t begin);
 
 /* r600_perfcounters.c */
-void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
+void si_perfcounters_destroy(struct si_screen *sscreen);
 
 /* r600_query.c */
-void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
-void r600_query_init(struct r600_common_context *rctx);
-void r600_suspend_queries(struct r600_common_context *ctx);
-void r600_resume_queries(struct r600_common_context *ctx);
-void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
-
-/* r600_streamout.c */
-void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
-void r600_set_streamout_targets(struct pipe_context *ctx,
-                               unsigned num_targets,
-                               struct pipe_stream_output_target **targets,
-                               const unsigned *offset);
-void r600_emit_streamout_end(struct r600_common_context *rctx);
-void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
-                                            unsigned type, int diff);
-void r600_streamout_init(struct r600_common_context *rctx);
-
-/* r600_test_dma.c */
-void r600_test_dma(struct r600_common_screen *rscreen);
+void si_init_screen_query_functions(struct si_screen *sscreen);
+void si_init_query_functions(struct r600_common_context *rctx);
+void si_suspend_queries(struct r600_common_context *ctx);
+void si_resume_queries(struct r600_common_context *ctx);
 
 /* r600_texture.c */
-bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
-                               struct r600_texture *rdst,
-                               unsigned dst_level, unsigned dstx,
-                               unsigned dsty, unsigned dstz,
-                               struct r600_texture *rsrc,
-                               unsigned src_level,
-                               const struct pipe_box *src_box);
-void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
-                                struct r600_texture *rtex,
-                                unsigned nr_samples,
-                                struct r600_fmask_info *out);
-void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
-                                struct r600_texture *rtex,
-                                struct r600_cmask_info *out);
-bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
-                                    struct pipe_resource *texture,
-                                    struct r600_texture **staging);
-void r600_print_texture_info(struct r600_common_screen *rscreen,
-                            struct r600_texture *rtex, FILE *f);
-struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
+bool si_prepare_for_dma_blit(struct r600_common_context *rctx,
+                            struct r600_texture *rdst,
+                            unsigned dst_level, unsigned dstx,
+                            unsigned dsty, unsigned dstz,
+                            struct r600_texture *rsrc,
+                            unsigned src_level,
+                            const struct pipe_box *src_box);
+void si_texture_get_fmask_info(struct si_screen *sscreen,
+                              struct r600_texture *rtex,
+                              unsigned nr_samples,
+                              struct r600_fmask_info *out);
+void si_texture_get_cmask_info(struct si_screen *sscreen,
+                              struct r600_texture *rtex,
+                              struct r600_cmask_info *out);
+void si_eliminate_fast_color_clear(struct r600_common_context *rctx,
+                                  struct r600_texture *rtex);
+void si_texture_discard_cmask(struct si_screen *sscreen,
+                             struct r600_texture *rtex);
+bool si_init_flushed_depth_texture(struct pipe_context *ctx,
+                                  struct pipe_resource *texture,
+                                  struct r600_texture **staging);
+void si_print_texture_info(struct si_screen *sscreen,
+                          struct r600_texture *rtex, struct u_log_context *log);
+struct pipe_resource *si_texture_create(struct pipe_screen *screen,
                                        const struct pipe_resource *templ);
 bool vi_dcc_formats_compatible(enum pipe_format format1,
                               enum pipe_format format2);
@@ -833,52 +619,24 @@ void vi_disable_dcc_if_incompatible_format(struct r600_common_context *rctx,
                                           struct pipe_resource *tex,
                                           unsigned level,
                                           enum pipe_format view_format);
-struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
-                                               struct pipe_resource *texture,
-                                               const struct pipe_surface *templ,
-                                               unsigned width0, unsigned height0,
-                                               unsigned width, unsigned height);
-unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
+struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
+                                             struct pipe_resource *texture,
+                                             const struct pipe_surface *templ,
+                                             unsigned width0, unsigned height0,
+                                             unsigned width, unsigned height);
+unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
+void vi_separate_dcc_try_enable(struct r600_common_context *rctx,
+                               struct r600_texture *tex);
 void vi_separate_dcc_start_query(struct pipe_context *ctx,
                                 struct r600_texture *tex);
 void vi_separate_dcc_stop_query(struct pipe_context *ctx,
                                struct r600_texture *tex);
 void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
                                             struct r600_texture *tex);
-void vi_dcc_clear_level(struct r600_common_context *rctx,
-                       struct r600_texture *rtex,
-                       unsigned level, unsigned clear_value);
-void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
-                                  struct pipe_framebuffer_state *fb,
-                                  struct r600_atom *fb_state,
-                                  unsigned *buffers, ubyte *dirty_cbufs,
-                                  const union pipe_color_union *color);
-bool r600_texture_disable_dcc(struct r600_common_context *rctx,
-                             struct r600_texture *rtex);
-void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
-void r600_init_context_texture_functions(struct r600_common_context *rctx);
-
-/* r600_viewport.c */
-void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
-                                           struct pipe_scissor_state *scissor);
-void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
-                                bool scissor_enable, bool clip_halfz);
-void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
-                                         struct tgsi_shader_info *info);
-void r600_init_viewport_functions(struct r600_common_context *rctx);
-
-/* cayman_msaa.c */
-extern const uint32_t eg_sample_locs_2x[4];
-extern const unsigned eg_max_dist_2x;
-extern const uint32_t eg_sample_locs_4x[4];
-extern const unsigned eg_max_dist_4x;
-void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
-                               unsigned sample_index, float *out_value);
-void cayman_init_msaa(struct pipe_context *ctx);
-void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
-void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
-                            int ps_iter_samples, int overrast_samples,
-                            unsigned sc_mode_cntl_1);
+bool si_texture_disable_dcc(struct r600_common_context *rctx,
+                           struct r600_texture *rtex);
+void si_init_screen_texture_functions(struct si_screen *sscreen);
+void si_init_context_texture_functions(struct r600_common_context *rctx);
 
 
 /* Inline helpers. */
@@ -901,116 +659,13 @@ r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
        pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
 }
 
-static inline void
-r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
-{
-       struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-       struct r600_resource *res = (struct r600_resource *)r;
-
-       if (res) {
-               /* Add memory usage for need_gfx_cs_space */
-               rctx->vram += res->vram_usage;
-               rctx->gtt += res->gart_usage;
-       }
-}
-
-static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
-{
-       return rctx->streamout.streamout_enabled ||
-              rctx->streamout.prims_gen_query_enabled;
-}
-
-#define     SQ_TEX_XY_FILTER_POINT                         0x00
-#define     SQ_TEX_XY_FILTER_BILINEAR                      0x01
-#define     SQ_TEX_XY_FILTER_ANISO_POINT                   0x02
-#define     SQ_TEX_XY_FILTER_ANISO_BILINEAR                0x03
-
-static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
-{
-       if (filter == PIPE_TEX_FILTER_LINEAR)
-               return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
-                                    : SQ_TEX_XY_FILTER_BILINEAR;
-       else
-               return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
-                                    : SQ_TEX_XY_FILTER_POINT;
-}
-
-static inline unsigned r600_tex_aniso_filter(unsigned filter)
-{
-       if (filter < 2)
-               return 0;
-       if (filter < 4)
-               return 1;
-       if (filter < 8)
-               return 2;
-       if (filter < 16)
-               return 3;
-       return 4;
-}
-
-static inline unsigned r600_wavefront_size(enum radeon_family family)
-{
-       switch (family) {
-       case CHIP_RV610:
-       case CHIP_RS780:
-       case CHIP_RV620:
-       case CHIP_RS880:
-               return 16;
-       case CHIP_RV630:
-       case CHIP_RV635:
-       case CHIP_RV730:
-       case CHIP_RV710:
-       case CHIP_PALM:
-       case CHIP_CEDAR:
-               return 32;
-       default:
-               return 64;
-       }
-}
-
-static inline enum radeon_bo_priority
-r600_get_sampler_view_priority(struct r600_resource *res)
-{
-       if (res->b.b.target == PIPE_BUFFER)
-               return RADEON_PRIO_SAMPLER_BUFFER;
-
-       if (res->b.b.nr_samples > 1)
-               return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
-
-       return RADEON_PRIO_SAMPLER_TEXTURE;
-}
-
-static inline bool
-r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
-{
-       return (stencil_sampler && tex->can_sample_s) ||
-              (!stencil_sampler && tex->can_sample_z);
-}
-
 static inline bool
 vi_dcc_enabled(struct r600_texture *tex, unsigned level)
 {
        return tex->dcc_offset && level < tex->surface.num_dcc_levels;
 }
 
-#define COMPUTE_DBG(rscreen, fmt, args...) \
-       do { \
-               if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
-       } while (0);
-
 #define R600_ERR(fmt, args...) \
        fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
 
-/* For MSAA sample positions. */
-#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
-       (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |                  \
-       (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |         \
-       (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |        \
-        (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
-
-static inline int S_FIXED(float value, unsigned frac_bits)
-{
-       return value * (1 << frac_bits);
-}
-
 #endif