vc4: Add support for 16-bit signed/unsigned norm/scaled vertex attrs.
[mesa.git] / src / gallium / drivers / r600 / r600_pipe.h
index 541d42e798315af30d5be20c7c4024da315f3d42..46b0a2de07d010a0c2321bc8f0cd0aececd04143 100644 (file)
 #ifndef R600_PIPE_H
 #define R600_PIPE_H
 
-#include "util/u_blitter.h"
-#include "util/u_slab.h"
-#include "util/u_suballoc.h"
-#include "util/u_double_list.h"
-#include "util/u_transfer.h"
+#include "radeon/r600_pipe_common.h"
+#include "radeon/r600_cs.h"
+
 #include "r600_llvm.h"
 #include "r600_public.h"
-#include "r600_resource.h"
 
-#define R600_NUM_ATOMS 40
+#include "util/u_suballoc.h"
+#include "util/u_double_list.h"
+#include "util/u_transfer.h"
 
-#define R600_TRACE_CS 0
+#define R600_NUM_ATOMS 73
 
 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS       16
-#define R600_MAX_DRAW_CS_DWORDS                34
+#define R600_MAX_DRAW_CS_DWORDS                40
 #define R600_TRACE_CS_DWORDS           7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
 
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
+ * of 16 const buffers.
+ * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
+ *
+ * In order to support d3d 11 mandated minimum of 15 user const buffers
+ * we'd have to squash all use cases into one driver buffer.
+ */
+#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 
-#define R600_MAX_CONST_BUFFER_SIZE 4096
+#define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
 #ifdef PIPE_ARCH_BIG_ENDIAN
 #define R600_BIG_ENDIAN 1
 #define R600_BIG_ENDIAN 0
 #endif
 
-#define R600_MAP_BUFFER_ALIGNMENT 64
-
-#define R600_ERR(fmt, args...) \
-       fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
-
-#define R600_CONTEXT_INVAL_READ_CACHES         (1 << 0)
-#define R600_CONTEXT_STREAMOUT_FLUSH           (1 << 1)
-#define R600_CONTEXT_WAIT_3D_IDLE              (1 << 2)
-#define R600_CONTEXT_WAIT_CP_DMA_IDLE          (1 << 3)
-#define R600_CONTEXT_FLUSH_AND_INV             (1 << 4)
-#define R600_CONTEXT_FLUSH_AND_INV_CB_META     (1 << 5)
-#define R600_CONTEXT_PS_PARTIAL_FLUSH          (1 << 6)
-#define R600_CONTEXT_FLUSH_AND_INV_DB_META      (1 << 7)
-
-#define R600_QUERY_DRAW_CALLS          (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM      (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT       (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-
 struct r600_context;
 struct r600_bytecode;
 struct r600_shader_key;
 
-/* This encapsulates a state or an operation which can emitted into the GPU
- * command stream. It's not limited to states only, it can be used for anything
- * that wants to write commands into the CS (e.g. cache flushes). */
-struct r600_atom {
-       void (*emit)(struct r600_context *ctx, struct r600_atom *state);
-       unsigned                id;
-       unsigned                num_dw;
-       bool                    dirty;
-};
-
 /* This is an atom containing GPU commands that never change.
  * This is supposed to be copied directly into the CS. */
 struct r600_command_buffer {
@@ -135,6 +114,7 @@ struct r600_clip_misc_state {
        unsigned pa_cl_vs_out_cntl; /* from vertex shader */
        unsigned clip_plane_enable; /* from rasterizer    */
        unsigned clip_dist_write;   /* from vertex shader */
+       boolean clip_disable;       /* from vertex shader */
 };
 
 struct r600_alphatest_state {
@@ -165,6 +145,7 @@ struct r600_clip_state {
 struct r600_cs_shader_state {
        struct r600_atom atom;
        unsigned kernel_index;
+       unsigned pc;
        struct r600_pipe_compute *shader;
 };
 
@@ -186,6 +167,7 @@ struct r600_sample_mask {
 struct r600_config_state {
        struct r600_atom atom;
        unsigned sq_gpr_resource_mgmt_1;
+       unsigned sq_gpr_resource_mgmt_2;
 };
 
 struct r600_stencil_ref
@@ -204,94 +186,50 @@ struct r600_stencil_ref_state {
 struct r600_viewport_state {
        struct r600_atom atom;
        struct pipe_viewport_state state;
+       int idx;
 };
 
-struct r600_pipe_fences {
-       struct r600_resource            *bo;
-       unsigned                        *data;
-       unsigned                        next_index;
-       /* linked list of preallocated blocks */
-       struct list_head                blocks;
-       /* linked list of freed fences */
-       struct list_head                pool;
-       pipe_mutex                      mutex;
+struct r600_shader_stages_state {
+       struct r600_atom atom;
+       unsigned geom_enable;
 };
 
-enum r600_msaa_texture_mode {
-       /* If the hw can fetch the first sample only (no decompression available).
-        * This means MSAA texturing is not fully implemented. */
-       MSAA_TEXTURE_SAMPLE_ZERO,
-
-       /* If the hw can fetch decompressed MSAA textures.
-        * Supported families: R600, R700, Evergreen.
-        * Cayman cannot use this, because it cannot do the decompression. */
-       MSAA_TEXTURE_DECOMPRESSED,
-
-       /* If the hw can fetch compressed MSAA textures, which means shaders can
-        * read resolved FMASK. This yields the best performance.
-        * Supported families: Evergreen, Cayman. */
-       MSAA_TEXTURE_COMPRESSED
+struct r600_gs_rings_state {
+       struct r600_atom atom;
+       unsigned enable;
+       struct pipe_constant_buffer esgs_ring;
+       struct pipe_constant_buffer gsvs_ring;
 };
 
-typedef boolean (*r600g_dma_blit_t)(struct pipe_context *ctx,
-                               struct pipe_resource *dst,
-                               unsigned dst_level,
-                               unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                               struct pipe_resource *src,
-                               unsigned src_level,
-                               const struct pipe_box *src_box);
-
-/* logging */
-#define DBG_TEX_DEPTH          (1 << 0)
-#define DBG_COMPUTE            (1 << 1)
-/* shaders */
-#define DBG_FS                 (1 << 8)
-#define DBG_VS                 (1 << 9)
-#define DBG_GS                 (1 << 10)
-#define DBG_PS                 (1 << 11)
-#define DBG_CS                 (1 << 12)
+/* This must start from 16. */
 /* features */
-#define DBG_NO_HYPERZ          (1 << 16)
-#define DBG_NO_LLVM            (1 << 17)
-#define DBG_NO_CP_DMA          (1 << 18)
-#define DBG_NO_ASYNC_DMA       (1 << 19)
-#define DBG_NO_DISCARD_RANGE   (1 << 20)
-
-struct r600_tiling_info {
-       unsigned num_channels;
-       unsigned num_banks;
-       unsigned group_bytes;
-};
+#define DBG_LLVM               (1 << 29)
+#define DBG_NO_CP_DMA          (1 << 30)
+/* shader backend */
+#define DBG_NO_SB              (1 << 21)
+#define DBG_SB_CS              (1 << 22)
+#define DBG_SB_DRY_RUN (1 << 23)
+#define DBG_SB_STAT            (1 << 24)
+#define DBG_SB_DUMP            (1 << 25)
+#define DBG_SB_NO_FALLBACK     (1 << 26)
+#define DBG_SB_DISASM  (1 << 27)
+#define DBG_SB_SAFEMATH        (1 << 28)
 
 struct r600_screen {
-       struct pipe_screen              screen;
-       struct radeon_winsys            *ws;
-       unsigned                        debug_flags;
-       unsigned                        family;
-       enum chip_class                 chip_class;
-       struct radeon_info              info;
-       bool                            has_streamout;
+       struct r600_common_screen       b;
        bool                            has_msaa;
-       bool                            has_cp_dma;
-       enum r600_msaa_texture_mode     msaa_texture_support;
-       struct r600_tiling_info         tiling_info;
-       struct r600_pipe_fences         fences;
+       bool                            has_compressed_msaa_texturing;
 
        /*for compute global memory binding, we allocate stuff here, instead of
         * buffers.
         * XXX: Not sure if this is the best place for global_pool.  Also,
         * it's not thread safe, so it won't work with multiple contexts. */
        struct compute_memory_pool *global_pool;
-#if R600_TRACE_CS
-       struct r600_resource            *trace_bo;
-       uint32_t                        *trace_ptr;
-       unsigned                        cs_count;
-#endif
-       r600g_dma_blit_t                dma_blit;
 };
 
 struct r600_pipe_sampler_view {
        struct pipe_sampler_view        base;
+       struct list_head                list;
        struct r600_resource            *tex_resource;
        uint32_t                        tex_resource_words[8];
        bool                            skip_mip_address_reloc;
@@ -305,6 +243,7 @@ struct r600_rasterizer_state {
        unsigned                        clip_plane_enable;
        unsigned                        pa_sc_line_stipple;
        unsigned                        pa_cl_clip_cntl;
+       unsigned                        pa_su_sc_mode_cntl;
        float                           offset_units;
        float                           offset_scale;
        bool                            offset_enable;
@@ -376,7 +315,6 @@ struct r600_samplerview_state {
        uint32_t                        dirty_mask;
        uint32_t                        compressed_depthtex_mask; /* which textures are depth */
        uint32_t                        compressed_colortex_mask;
-       boolean                         dirty_txq_constants;
        boolean                         dirty_buffer_constants;
 };
 
@@ -399,23 +337,6 @@ struct r600_textures_info {
        uint32_t                        *buffer_constants;
 };
 
-struct r600_fence {
-       struct pipe_reference           reference;
-       unsigned                        index; /* in the shared bo */
-       struct r600_resource            *sleep_bo;
-       struct list_head                head;
-};
-
-#define FENCE_BLOCK_SIZE 16
-
-struct r600_fence_block {
-       struct r600_fence               fences[FENCE_BLOCK_SIZE];
-       struct list_head                head;
-};
-
-#define R600_CONSTANT_ARRAY_SIZE 256
-#define R600_RESOURCE_ARRAY_SIZE 160
-
 struct r600_constbuf_state
 {
        struct r600_atom                atom;
@@ -445,6 +366,7 @@ struct r600_scissor_state
        struct r600_atom                atom;
        struct pipe_scissor_state       scissor;
        bool                            enable; /* r6xx only */
+       int idx;
 };
 
 struct r600_fetch_shader {
@@ -454,102 +376,26 @@ struct r600_fetch_shader {
 
 struct r600_shader_state {
        struct r600_atom                atom;
-       struct r600_pipe_shader_selector *shader;
-};
-
-struct r600_query_buffer {
-       /* The buffer where query results are stored. */
-       struct r600_resource                    *buf;
-       /* Offset of the next free result after current query data */
-       unsigned                                results_end;
-       /* If a query buffer is full, a new buffer is created and the old one
-        * is put in here. When we calculate the result, we sum up the samples
-        * from all buffers. */
-       struct r600_query_buffer                *previous;
-};
-
-struct r600_query {
-       /* The query buffer and how many results are in it. */
-       struct r600_query_buffer                buffer;
-       /* The type of query */
-       unsigned                                type;
-       /* Size of the result in memory for both begin_query and end_query,
-        * this can be one or two numbers, or it could even be a size of a structure. */
-       unsigned                                result_size;
-       /* The number of dwords for begin_query or end_query. */
-       unsigned                                num_cs_dw;
-       /* linked list of queries */
-       struct list_head                        list;
-       /* for custom non-GPU queries */
-       uint64_t begin_result;
-       uint64_t end_result;
-};
-
-struct r600_so_target {
-       struct pipe_stream_output_target b;
-
-       /* The buffer where BUFFER_FILLED_SIZE is stored. */
-       struct r600_resource    *buf_filled_size;
-       unsigned                buf_filled_size_offset;
-
-       unsigned                stride_in_dw;
-       unsigned                so_index;
-};
-
-struct r600_streamout {
-       struct r600_atom                begin_atom;
-       bool                            begin_emitted;
-       unsigned                        num_dw_for_end;
-
-       unsigned                        enabled_mask;
-       unsigned                        num_targets;
-       struct r600_so_target           *targets[PIPE_MAX_SO_BUFFERS];
-
-       unsigned                        append_bitmask;
-       bool                            suspended;
-};
-
-struct r600_ring {
-       struct radeon_winsys_cs         *cs;
-       bool                            flushing;
-       void (*flush)(void *ctx, unsigned flags);
-};
-
-struct r600_rings {
-       struct r600_ring                gfx;
-       struct r600_ring                dma;
+       struct r600_pipe_shader *shader;
 };
 
 struct r600_context {
-       struct pipe_context             context;
+       struct r600_common_context      b;
        struct r600_screen              *screen;
-       struct radeon_winsys            *ws;
-       struct r600_rings               rings;
        struct blitter_context          *blitter;
-       struct u_upload_mgr             *uploader;
-       struct u_suballocator           *allocator_so_filled_size;
        struct u_suballocator           *allocator_fetch_shader;
-       struct util_slab_mempool        pool_transfers;
 
        /* Hardware info. */
-       enum radeon_family              family;
-       enum chip_class                 chip_class;
        boolean                         has_vertex_cache;
        boolean                         keep_tiling_flags;
        unsigned                        default_ps_gprs, default_vs_gprs;
        unsigned                        r6xx_num_clause_temp_gprs;
-       unsigned                        backend_mask;
-       unsigned                        max_db; /* for OQ */
-
-       /* current unaccounted memory usage */
-       uint64_t                        vram;
-       uint64_t                        gtt;
 
        /* Miscellaneous state objects. */
        void                            *custom_dsa_flush;
        void                            *custom_blend_resolve;
        void                            *custom_blend_decompress;
-       void                            *custom_blend_fmask_decompress;
+       void                            *custom_blend_fastclear;
        /* With rasterizer discard, there doesn't have to be a pixel shader.
         * In that case, we bind this one: */
        void                            *dummy_pixel_shader;
@@ -581,35 +427,39 @@ struct r600_context {
        struct r600_poly_offset_state   poly_offset_state;
        struct r600_cso_state           rasterizer_state;
        struct r600_sample_mask         sample_mask;
-       struct r600_scissor_state       scissor;
+       struct r600_scissor_state       scissor[16];
        struct r600_seamless_cube_map   seamless_cube_map;
        struct r600_config_state        config_state;
        struct r600_stencil_ref_state   stencil_ref;
        struct r600_vgt_state           vgt_state;
-       struct r600_viewport_state      viewport;
+       struct r600_viewport_state      viewport[16];
        /* Shaders and shader resources. */
        struct r600_cso_state           vertex_fetch_shader;
        struct r600_shader_state        vertex_shader;
        struct r600_shader_state        pixel_shader;
+       struct r600_shader_state        geometry_shader;
+       struct r600_shader_state        export_shader;
        struct r600_cs_shader_state     cs_shader_state;
+       struct r600_shader_stages_state shader_stages;
+       struct r600_gs_rings_state      gs_rings;
        struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];
        struct r600_textures_info       samplers[PIPE_SHADER_TYPES];
        /** Vertex buffers for fetch shaders */
        struct r600_vertexbuf_state     vertex_buffer_state;
        /** Vertex buffers for compute shaders */
        struct r600_vertexbuf_state     cs_vertex_buffer_state;
-       struct r600_streamout           streamout;
 
        /* Additional context states. */
-       unsigned                        flags;
        unsigned                        compute_cb_target_mask;
        struct r600_pipe_shader_selector *ps_shader;
        struct r600_pipe_shader_selector *vs_shader;
+       struct r600_pipe_shader_selector *gs_shader;
        struct r600_rasterizer_state    *rasterizer;
        bool                            alpha_to_one;
        bool                            force_blend_disable;
        boolean                         dual_src_blend;
        unsigned                        zwritemask;
+       int                                     ps_iter_samples;
 
        /* Index buffer. */
        struct pipe_index_buffer        index_buffer;
@@ -618,23 +468,7 @@ struct r600_context {
        int                             last_primitive_type; /* Last primitive type used in draw_vbo. */
        int                             last_start_instance;
 
-       /* Queries. */
-       /* The list of active queries. Only one query of each type can be active. */
-       int                             num_occlusion_queries;
-       /* Keep track of non-timer queries, because they should be suspended
-        * during context flushing.
-        * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
-       struct list_head                active_nontimer_queries;
-       unsigned                        num_cs_dw_nontimer_queries_suspend;
-       /* If queries have been suspended. */
-       bool                            nontimer_queries_suspended;
-       unsigned                        num_draw_calls;
-
-       /* Render condition. */
-       struct pipe_query               *current_render_cond;
-       unsigned                        current_render_cond_mode;
-       boolean                         predicate_drawing;
-
+       void                            *sb_context;
        struct r600_isa         *isa;
 };
 
@@ -646,19 +480,15 @@ static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
        cs->cdw += cb->num_dw;
 }
 
-#if R600_TRACE_CS
 void r600_trace_emit(struct r600_context *rctx);
-#endif
 
 static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
 {
-       atom->emit(rctx, atom);
+       atom->emit(&rctx->b, atom);
        atom->dirty = false;
-#if R600_TRACE_CS
-       if (rctx->screen->trace_bo) {
+       if (rctx->screen->b.trace_bo) {
                r600_trace_emit(rctx);
        }
-#endif
 }
 
 static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
@@ -671,7 +501,7 @@ static INLINE void r600_set_cso_state_with_cb(struct r600_cso_state *state, void
                                              struct r600_command_buffer *cb)
 {
        state->cb = cb;
-       state->atom.num_dw = cb->num_dw;
+       state->atom.num_dw = cb ? cb->num_dw : 0;
        r600_set_cso_state(state, cso);
 }
 
@@ -681,12 +511,18 @@ void compute_memory_pool_delete(struct compute_memory_pool* pool);
 struct compute_memory_pool* compute_memory_pool_new(
        struct r600_screen *rscreen);
 
+/* evergreen_compute.c */
+void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
+                                   unsigned start_slot, unsigned count,
+                                   struct pipe_sampler_view **views);
+
 /* evergreen_state.c */
 struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                     struct pipe_resource *texture,
                                     const struct pipe_sampler_view *state,
-                                    unsigned width0, unsigned height0);
+                                    unsigned width0, unsigned height0,
+                                    unsigned force_level);
 void evergreen_init_common_regs(struct r600_command_buffer *cb,
                                enum chip_class ctx_chip_class,
                                enum radeon_family ctx_family,
@@ -699,11 +535,13 @@ void cayman_init_common_regs(struct r600_command_buffer *cb,
 void evergreen_init_state_functions(struct r600_context *rctx);
 void evergreen_init_atom_start_cs(struct r600_context *rctx);
 void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
 void *evergreen_create_resolve_blend(struct r600_context *rctx);
 void *evergreen_create_decompress_blend(struct r600_context *rctx);
-void *evergreen_create_fmask_decompress_blend(struct r600_context *rctx);
+void *evergreen_create_fastclear_blend(struct r600_context *rctx);
 boolean evergreen_is_format_supported(struct pipe_screen *screen,
                                      enum pipe_format format,
                                      enum pipe_texture_target target,
@@ -716,53 +554,24 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
 void evergreen_update_db_shader_control(struct r600_context * rctx);
 
 /* r600_blit.c */
-void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx,
-                     struct pipe_resource *src, const struct pipe_box *src_box);
 void r600_init_blit_functions(struct r600_context *rctx);
-void r600_blit_decompress_depth(struct pipe_context *ctx,
-               struct r600_texture *texture,
-               struct r600_texture *staging,
-               unsigned first_level, unsigned last_level,
-               unsigned first_layer, unsigned last_layer,
-               unsigned first_sample, unsigned last_sample);
 void r600_decompress_depth_textures(struct r600_context *rctx,
                                    struct r600_samplerview_state *textures);
 void r600_decompress_color_textures(struct r600_context *rctx,
                                    struct r600_samplerview_state *textures);
-
-/* r600_buffer.c */
-bool r600_init_resource(struct r600_screen *rscreen,
-                       struct r600_resource *res,
-                       unsigned size, unsigned alignment,
-                       bool use_reusable_pool, unsigned usage);
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
-                                        const struct pipe_resource *templ,
-                                        unsigned alignment);
-
-/* r600_pipe.c */
-boolean r600_rings_is_buffer_referenced(struct r600_context *ctx,
-                                       struct radeon_winsys_cs_handle *buf,
-                                       enum radeon_bo_usage usage);
-void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx,
-                                       struct r600_resource *resource,
-                                       unsigned usage);
-
-/* r600_query.c */
-void r600_init_query_functions(struct r600_context *rctx);
-void r600_suspend_nontimer_queries(struct r600_context *ctx);
-void r600_resume_nontimer_queries(struct r600_context *ctx);
-
-/* r600_resource.c */
-void r600_init_context_resource_functions(struct r600_context *r600);
+void r600_resource_copy_region(struct pipe_context *ctx,
+                              struct pipe_resource *dst,
+                              unsigned dst_level,
+                              unsigned dstx, unsigned dsty, unsigned dstz,
+                              struct pipe_resource *src,
+                              unsigned src_level,
+                              const struct pipe_box *src_box);
 
 /* r600_shader.c */
 int r600_pipe_shader_create(struct pipe_context *ctx,
                            struct r600_pipe_shader *shader,
                            struct r600_shader_key key);
-#ifdef HAVE_OPENCL
-int r600_compute_shader_create(struct pipe_context * ctx,
-       LLVMModuleRef mod,  struct r600_bytecode * bytecode);
-#endif
+
 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 
 /* r600_state.c */
@@ -774,6 +583,8 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
 void r600_init_state_functions(struct r600_context *rctx);
 void r600_init_atom_start_cs(struct r600_context *rctx);
 void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void *r600_create_db_flush_dsa(struct r600_context *rctx);
 void *r600_create_resolve_blend(struct r600_context *rctx);
@@ -787,70 +598,35 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
                                 unsigned usage);
 void r600_update_db_shader_control(struct r600_context * rctx);
 
-/* r600_texture.c */
-void r600_init_screen_texture_functions(struct pipe_screen *screen);
-void r600_init_surface_functions(struct r600_context *r600);
-uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
-                                 const unsigned char *swizzle_view,
-                                 uint32_t *word4_p, uint32_t *yuv_format_p);
-unsigned r600_texture_get_offset(struct r600_texture *rtex,
-                                       unsigned level, unsigned layer);
-struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
-                                               struct pipe_resource *texture,
-                                               const struct pipe_surface *templ,
-                                               unsigned width, unsigned height);
-
-unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
-                                  const unsigned char *swizzle_view,
-                                  boolean vtx);
-
 /* r600_hw_context.c */
-void r600_get_backend_mask(struct r600_context *ctx);
-void r600_context_flush(struct r600_context *ctx, unsigned flags);
+void r600_context_gfx_flush(void *context, unsigned flags,
+                           struct pipe_fence_handle **fence);
 void r600_begin_new_cs(struct r600_context *ctx);
-void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence,
-                             unsigned offset, unsigned value);
 void r600_flush_emit(struct r600_context *ctx);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
-void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw);
 void r600_cp_dma_copy_buffer(struct r600_context *rctx,
                             struct pipe_resource *dst, uint64_t dst_offset,
                             struct pipe_resource *src, uint64_t src_offset,
                             unsigned size);
-void r600_dma_copy(struct r600_context *rctx,
-               struct pipe_resource *dst,
-               struct pipe_resource *src,
-               uint64_t dst_offset,
-               uint64_t src_offset,
-               uint64_t size);
-boolean r600_dma_blit(struct pipe_context *ctx,
-                       struct pipe_resource *dst,
-                       unsigned dst_level,
-                       unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                       struct pipe_resource *src,
-                       unsigned src_level,
-                       const struct pipe_box *src_box);
-void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
-void r600_emit_streamout_end(struct r600_context *ctx);
+void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
+                                  struct pipe_resource *dst, uint64_t offset,
+                                  unsigned size, uint32_t clear_value);
+void r600_dma_copy_buffer(struct r600_context *rctx,
+                         struct pipe_resource *dst,
+                         struct pipe_resource *src,
+                         uint64_t dst_offset,
+                         uint64_t src_offset,
+                         uint64_t size);
 
 /*
  * evergreen_hw_context.c
  */
-void evergreen_flush_vgt_streamout(struct r600_context *ctx);
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit);
-void evergreen_dma_copy(struct r600_context *rctx,
-               struct pipe_resource *dst,
-               struct pipe_resource *src,
-               uint64_t dst_offset,
-               uint64_t src_offset,
-               uint64_t size);
-boolean evergreen_dma_blit(struct pipe_context *ctx,
-                       struct pipe_resource *dst,
-                       unsigned dst_level,
-                       unsigned dst_x, unsigned dst_y, unsigned dst_z,
-                       struct pipe_resource *src,
-                       unsigned src_level,
-                       const struct pipe_box *src_box);
+void evergreen_dma_copy_buffer(struct r600_context *rctx,
+                              struct pipe_resource *dst,
+                              struct pipe_resource *src,
+                              uint64_t dst_offset,
+                              uint64_t src_offset,
+                              uint64_t size);
 
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
@@ -871,10 +647,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 void r600_sampler_states_dirty(struct r600_context *rctx,
                               struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
-void r600_streamout_buffers_dirty(struct r600_context *rctx);
-void r600_draw_rectangle(struct blitter_context *blitter,
-                        int x1, int y1, int x2, int y2, float depth,
-                        enum blitter_attrib_type type, const union pipe_color_union *attrib);
+void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
 uint32_t r600_translate_stencil_op(int s_op);
 uint32_t r600_translate_fill(uint32_t func);
 unsigned r600_tex_wrap(unsigned wrap);
@@ -882,6 +655,25 @@ unsigned r600_tex_filter(unsigned filter);
 unsigned r600_tex_mipfilter(unsigned filter);
 unsigned r600_tex_compare(unsigned compare);
 bool sampler_state_needs_border_color(const struct pipe_sampler_state *state);
+struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
+                                               struct pipe_resource *texture,
+                                               const struct pipe_surface *templ,
+                                               unsigned width, unsigned height);
+unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
+                                  const unsigned char *swizzle_view,
+                                  boolean vtx);
+uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
+                                 const unsigned char *swizzle_view,
+                                 uint32_t *word4_p, uint32_t *yuv_format_p);
+uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format);
+uint32_t r600_colorformat_endian_swap(uint32_t colorformat);
+
+/* r600_uvd.c */
+struct pipe_video_codec *r600_uvd_create_decoder(struct pipe_context *context,
+                                                  const struct pipe_video_codec *decoder);
+
+struct pipe_video_buffer *r600_video_buffer_create(struct pipe_context *pipe,
+                                                  const struct pipe_video_buffer *tmpl);
 
 /*
  * Helpers for building command buffers
@@ -1006,60 +798,6 @@ static INLINE void eg_store_loop_const(struct r600_command_buffer *cb, unsigned
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw);
 void r600_release_command_buffer(struct r600_command_buffer *cb);
 
-/*
- * Helpers for emitting state into a command stream directly.
- */
-static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx,
-                                            struct r600_ring *ring,
-                                            struct r600_resource *rbo,
-                                            enum radeon_bo_usage usage)
-{
-       assert(usage);
-       /* make sure that all previous ring use are flushed so everything
-        * look serialized from driver pov
-        */
-       if (!ring->flushing) {
-               if (ring == &ctx->rings.gfx) {
-                       if (ctx->rings.dma.cs) {
-                               /* flush dma ring */
-                               ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC);
-                       }
-               } else {
-                       /* flush gfx ring */
-                       ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
-               }
-       }
-       return ctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, rbo->domains) * 4;
-}
-
-static INLINE void r600_write_value(struct radeon_winsys_cs *cs, unsigned value)
-{
-       cs->buf[cs->cdw++] = value;
-}
-
-static INLINE void r600_write_array(struct radeon_winsys_cs *cs, unsigned num, unsigned *ptr)
-{
-       assert(cs->cdw+num <= RADEON_MAX_CMDBUF_DWORDS);
-       memcpy(&cs->buf[cs->cdw], ptr, num * sizeof(ptr[0]));
-       cs->cdw += num;
-}
-
-static INLINE void r600_write_config_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
-{
-       assert(reg < R600_CONTEXT_REG_OFFSET);
-       assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
-       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, num, 0);
-       cs->buf[cs->cdw++] = (reg - R600_CONFIG_REG_OFFSET) >> 2;
-}
-
-static INLINE void r600_write_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
-{
-       assert(reg >= R600_CONTEXT_REG_OFFSET && reg < R600_CTL_CONST_OFFSET);
-       assert(cs->cdw+2+num <= RADEON_MAX_CMDBUF_DWORDS);
-       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, num, 0);
-       cs->buf[cs->cdw++] = (reg - R600_CONTEXT_REG_OFFSET) >> 2;
-}
-
 static INLINE void r600_write_compute_context_reg_seq(struct radeon_winsys_cs *cs, unsigned reg, unsigned num)
 {
        r600_write_context_reg_seq(cs, reg, num);
@@ -1075,28 +813,25 @@ static INLINE void r600_write_ctl_const_seq(struct radeon_winsys_cs *cs, unsigne
        cs->buf[cs->cdw++] = (reg - R600_CTL_CONST_OFFSET) >> 2;
 }
 
-static INLINE void r600_write_config_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
-{
-       r600_write_config_reg_seq(cs, reg, 1);
-       r600_write_value(cs, value);
-}
-
-static INLINE void r600_write_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
-       r600_write_context_reg_seq(cs, reg, 1);
-       r600_write_value(cs, value);
+       r600_write_compute_context_reg_seq(cs, reg, 1);
+       radeon_emit(cs, value);
 }
 
-static INLINE void r600_write_compute_context_reg(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
+static INLINE void r600_write_context_reg_flag(struct radeon_winsys_cs *cs, unsigned reg, unsigned value, unsigned flag)
 {
-       r600_write_compute_context_reg_seq(cs, reg, 1);
-       r600_write_value(cs, value);
+       if (flag & RADEON_CP_PACKET3_COMPUTE_MODE) {
+               r600_write_compute_context_reg(cs, reg, value);
+       } else {
+               r600_write_context_reg(cs, reg, value);
+       }
 }
 
 static INLINE void r600_write_ctl_const(struct radeon_winsys_cs *cs, unsigned reg, unsigned value)
 {
        r600_write_ctl_const_seq(cs, reg, 1);
-       r600_write_value(cs, value);
+       radeon_emit(cs, value);
 }
 
 /*
@@ -1108,15 +843,6 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
 }
 #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
 
-static inline unsigned r600_tex_aniso_filter(unsigned filter)
-{
-       if (filter <= 1)   return 0;
-       if (filter <= 2)   return 1;
-       if (filter <= 4)   return 2;
-       if (filter <= 8)   return 3;
-        /* else */        return 4;
-}
-
 /* 12.4 fixed-point */
 static INLINE unsigned r600_pack_float_12p4(float x)
 {
@@ -1124,36 +850,40 @@ static INLINE unsigned r600_pack_float_12p4(float x)
               x >= 4096 ? 0xffff : x * 16;
 }
 
-static INLINE uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource)
-{
-       struct r600_screen *rscreen = (struct r600_screen*)screen;
-       struct r600_resource *rresource = (struct r600_resource*)resource;
-
-       return rscreen->ws->buffer_get_virtual_address(rresource->cs_buf);
-}
-
-static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
-{
-       struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_resource *rr = (struct r600_resource *)r;
-
-       if (r == NULL) {
-               return;
-       }
-
-       /*
-        * The idea is to compute a gross estimate of memory requirement of
-        * each draw call. After each draw call, memory will be precisely
-        * accounted. So the uncertainty is only on the current draw call.
-        * In practice this gave very good estimate (+/- 10% of the target
-        * memory limit).
-        */
-       if (rr->domains & RADEON_DOMAIN_GTT) {
-               rctx->gtt += rr->buf->size;
-       }
-       if (rr->domains & RADEON_DOMAIN_VRAM) {
-               rctx->vram += rr->buf->size;
-       }
+/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
+static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+{
+       return rtex->resource.b.b.nr_samples <= 1 &&
+              (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
+               rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT);
+}
+
+#define     V_028A6C_OUTPRIM_TYPE_POINTLIST            0
+#define     V_028A6C_OUTPRIM_TYPE_LINESTRIP            1
+#define     V_028A6C_OUTPRIM_TYPE_TRISTRIP             2
+
+static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+{
+       static const int prim_conv[] = {
+               V_028A6C_OUTPRIM_TYPE_POINTLIST,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP
+       };
+       assert(mode < Elements(prim_conv));
+
+       return prim_conv[mode];
 }
 
 #endif