vc4: Add support for 16-bit signed/unsigned norm/scaled vertex attrs.
[mesa.git] / src / gallium / drivers / r600 / r600_pipe.h
index 45512639b52212102810c287542b0e2a0532033a..46b0a2de07d010a0c2321bc8f0cd0aececd04143 100644 (file)
 #ifndef R600_PIPE_H
 #define R600_PIPE_H
 
-#include "../radeon/r600_pipe_common.h"
-#include "../radeon/r600_cs.h"
+#include "radeon/r600_pipe_common.h"
+#include "radeon/r600_cs.h"
 
 #include "r600_llvm.h"
 #include "r600_public.h"
-#include "r600_resource.h"
 
-#include "util/u_blitter.h"
-#include "util/u_slab.h"
 #include "util/u_suballoc.h"
 #include "util/u_double_list.h"
 #include "util/u_transfer.h"
 
-#define R600_NUM_ATOMS 41
+#define R600_NUM_ATOMS 73
 
 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS       16
-#define R600_MAX_DRAW_CS_DWORDS                34
+#define R600_MAX_DRAW_CS_DWORDS                40
 #define R600_TRACE_CS_DWORDS           7
 
 #define R600_MAX_USER_CONST_BUFFERS 13
 
 /* start driver buffers after user buffers */
 #define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
-#define R600_TXQ_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
-#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1)
+#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2)
+/* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit
+ * of 16 const buffers.
+ * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id.
+ *
+ * In order to support d3d 11 mandated minimum of 15 user const buffers
+ * we'd have to squash all use cases into one driver buffer.
+ */
+#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS)
 
-#define R600_MAX_CONST_BUFFER_SIZE 4096
+#define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4]))
 
 #ifdef PIPE_ARCH_BIG_ENDIAN
 #define R600_BIG_ENDIAN 1
 #define R600_BIG_ENDIAN 0
 #endif
 
-#define R600_MAP_BUFFER_ALIGNMENT 64
-
-#define R600_QUERY_DRAW_CALLS          (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM      (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT       (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME    (PIPE_QUERY_DRIVER_SPECIFIC + 3)
-
 struct r600_context;
 struct r600_bytecode;
 struct r600_shader_key;
@@ -116,6 +114,7 @@ struct r600_clip_misc_state {
        unsigned pa_cl_vs_out_cntl; /* from vertex shader */
        unsigned clip_plane_enable; /* from rasterizer    */
        unsigned clip_dist_write;   /* from vertex shader */
+       boolean clip_disable;       /* from vertex shader */
 };
 
 struct r600_alphatest_state {
@@ -146,6 +145,7 @@ struct r600_clip_state {
 struct r600_cs_shader_state {
        struct r600_atom atom;
        unsigned kernel_index;
+       unsigned pc;
        struct r600_pipe_compute *shader;
 };
 
@@ -167,6 +167,7 @@ struct r600_sample_mask {
 struct r600_config_state {
        struct r600_atom atom;
        unsigned sq_gpr_resource_mgmt_1;
+       unsigned sq_gpr_resource_mgmt_2;
 };
 
 struct r600_stencil_ref
@@ -185,13 +186,25 @@ struct r600_stencil_ref_state {
 struct r600_viewport_state {
        struct r600_atom atom;
        struct pipe_viewport_state state;
+       int idx;
+};
+
+struct r600_shader_stages_state {
+       struct r600_atom atom;
+       unsigned geom_enable;
+};
+
+struct r600_gs_rings_state {
+       struct r600_atom atom;
+       unsigned enable;
+       struct pipe_constant_buffer esgs_ring;
+       struct pipe_constant_buffer gsvs_ring;
 };
 
 /* This must start from 16. */
 /* features */
-#define DBG_NO_LLVM            (1 << 17)
-#define DBG_NO_CP_DMA          (1 << 18)
-#define DBG_NO_ASYNC_DMA       (1 << 19)
+#define DBG_LLVM               (1 << 29)
+#define DBG_NO_CP_DMA          (1 << 30)
 /* shader backend */
 #define DBG_NO_SB              (1 << 21)
 #define DBG_SB_CS              (1 << 22)
@@ -212,13 +225,11 @@ struct r600_screen {
         * XXX: Not sure if this is the best place for global_pool.  Also,
         * it's not thread safe, so it won't work with multiple contexts. */
        struct compute_memory_pool *global_pool;
-       struct r600_resource            *trace_bo;
-       uint32_t                        *trace_ptr;
-       unsigned                        cs_count;
 };
 
 struct r600_pipe_sampler_view {
        struct pipe_sampler_view        base;
+       struct list_head                list;
        struct r600_resource            *tex_resource;
        uint32_t                        tex_resource_words[8];
        bool                            skip_mip_address_reloc;
@@ -232,6 +243,7 @@ struct r600_rasterizer_state {
        unsigned                        clip_plane_enable;
        unsigned                        pa_sc_line_stipple;
        unsigned                        pa_cl_clip_cntl;
+       unsigned                        pa_su_sc_mode_cntl;
        float                           offset_units;
        float                           offset_scale;
        bool                            offset_enable;
@@ -303,7 +315,6 @@ struct r600_samplerview_state {
        uint32_t                        dirty_mask;
        uint32_t                        compressed_depthtex_mask; /* which textures are depth */
        uint32_t                        compressed_colortex_mask;
-       boolean                         dirty_txq_constants;
        boolean                         dirty_buffer_constants;
 };
 
@@ -355,6 +366,7 @@ struct r600_scissor_state
        struct r600_atom                atom;
        struct pipe_scissor_state       scissor;
        bool                            enable; /* r6xx only */
+       int idx;
 };
 
 struct r600_fetch_shader {
@@ -364,53 +376,20 @@ struct r600_fetch_shader {
 
 struct r600_shader_state {
        struct r600_atom                atom;
-       struct r600_pipe_shader_selector *shader;
-};
-
-struct r600_query_buffer {
-       /* The buffer where query results are stored. */
-       struct r600_resource                    *buf;
-       /* Offset of the next free result after current query data */
-       unsigned                                results_end;
-       /* If a query buffer is full, a new buffer is created and the old one
-        * is put in here. When we calculate the result, we sum up the samples
-        * from all buffers. */
-       struct r600_query_buffer                *previous;
-};
-
-struct r600_query {
-       /* The query buffer and how many results are in it. */
-       struct r600_query_buffer                buffer;
-       /* The type of query */
-       unsigned                                type;
-       /* Size of the result in memory for both begin_query and end_query,
-        * this can be one or two numbers, or it could even be a size of a structure. */
-       unsigned                                result_size;
-       /* The number of dwords for begin_query or end_query. */
-       unsigned                                num_cs_dw;
-       /* linked list of queries */
-       struct list_head                        list;
-       /* for custom non-GPU queries */
-       uint64_t begin_result;
-       uint64_t end_result;
+       struct r600_pipe_shader *shader;
 };
 
 struct r600_context {
        struct r600_common_context      b;
        struct r600_screen              *screen;
        struct blitter_context          *blitter;
-       struct u_upload_mgr             *uploader;
        struct u_suballocator           *allocator_fetch_shader;
-       struct util_slab_mempool        pool_transfers;
-       unsigned                        initial_gfx_cs_size;
 
        /* Hardware info. */
        boolean                         has_vertex_cache;
        boolean                         keep_tiling_flags;
        unsigned                        default_ps_gprs, default_vs_gprs;
        unsigned                        r6xx_num_clause_temp_gprs;
-       unsigned                        backend_mask;
-       unsigned                        max_db; /* for OQ */
 
        /* Miscellaneous state objects. */
        void                            *custom_dsa_flush;
@@ -448,17 +427,21 @@ struct r600_context {
        struct r600_poly_offset_state   poly_offset_state;
        struct r600_cso_state           rasterizer_state;
        struct r600_sample_mask         sample_mask;
-       struct r600_scissor_state       scissor;
+       struct r600_scissor_state       scissor[16];
        struct r600_seamless_cube_map   seamless_cube_map;
        struct r600_config_state        config_state;
        struct r600_stencil_ref_state   stencil_ref;
        struct r600_vgt_state           vgt_state;
-       struct r600_viewport_state      viewport;
+       struct r600_viewport_state      viewport[16];
        /* Shaders and shader resources. */
        struct r600_cso_state           vertex_fetch_shader;
        struct r600_shader_state        vertex_shader;
        struct r600_shader_state        pixel_shader;
+       struct r600_shader_state        geometry_shader;
+       struct r600_shader_state        export_shader;
        struct r600_cs_shader_state     cs_shader_state;
+       struct r600_shader_stages_state shader_stages;
+       struct r600_gs_rings_state      gs_rings;
        struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];
        struct r600_textures_info       samplers[PIPE_SHADER_TYPES];
        /** Vertex buffers for fetch shaders */
@@ -470,11 +453,13 @@ struct r600_context {
        unsigned                        compute_cb_target_mask;
        struct r600_pipe_shader_selector *ps_shader;
        struct r600_pipe_shader_selector *vs_shader;
+       struct r600_pipe_shader_selector *gs_shader;
        struct r600_rasterizer_state    *rasterizer;
        bool                            alpha_to_one;
        bool                            force_blend_disable;
        boolean                         dual_src_blend;
        unsigned                        zwritemask;
+       int                                     ps_iter_samples;
 
        /* Index buffer. */
        struct pipe_index_buffer        index_buffer;
@@ -483,37 +468,8 @@ struct r600_context {
        int                             last_primitive_type; /* Last primitive type used in draw_vbo. */
        int                             last_start_instance;
 
-       /* Queries. */
-       /* The list of active queries. Only one query of each type can be active. */
-       int                             num_occlusion_queries;
-       int                             num_pipelinestat_queries;
-       /* Keep track of non-timer queries, because they should be suspended
-        * during context flushing.
-        * The timer queries (TIME_ELAPSED) shouldn't be suspended. */
-       struct list_head                active_nontimer_queries;
-       unsigned                        num_cs_dw_nontimer_queries_suspend;
-       /* If queries have been suspended. */
-       bool                            nontimer_queries_suspended;
-       unsigned                        num_draw_calls;
-
-       /* Render condition. */
-       struct pipe_query               *current_render_cond;
-       unsigned                        current_render_cond_mode;
-       boolean                         current_render_cond_cond;
-       boolean                         predicate_drawing;
-
        void                            *sb_context;
        struct r600_isa         *isa;
-
-       /* Work-around for flushing problems with compute shaders on Cayman:
-        * Emitting a SURFACE_SYNC packet with any of the CB*_DEST_BASE_ENA
-        * or DB_DEST_BASE_ENA bits set after dispatching a compute shader
-        * hangs the GPU.
-        *
-        * Setting this to true will prevent r600_flush_emit() from emitting
-        * a SURFACE_SYNC packet.  This field will be cleared by
-        * by r600_context_flush() after flushing the command stream. */
-       boolean                         skip_surface_sync_on_next_cs_flush;
 };
 
 static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
@@ -530,7 +486,7 @@ static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *a
 {
        atom->emit(&rctx->b, atom);
        atom->dirty = false;
-       if (rctx->screen->trace_bo) {
+       if (rctx->screen->b.trace_bo) {
                r600_trace_emit(rctx);
        }
 }
@@ -565,7 +521,8 @@ struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
                                     struct pipe_resource *texture,
                                     const struct pipe_sampler_view *state,
-                                    unsigned width0, unsigned height0);
+                                    unsigned width0, unsigned height0,
+                                    unsigned force_level);
 void evergreen_init_common_regs(struct r600_command_buffer *cb,
                                enum chip_class ctx_chip_class,
                                enum radeon_family ctx_family,
@@ -578,6 +535,8 @@ void cayman_init_common_regs(struct r600_command_buffer *cb,
 void evergreen_init_state_functions(struct r600_context *rctx);
 void evergreen_init_atom_start_cs(struct r600_context *rctx);
 void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx);
 void *evergreen_create_resolve_blend(struct r600_context *rctx);
@@ -600,23 +559,13 @@ void r600_decompress_depth_textures(struct r600_context *rctx,
                                    struct r600_samplerview_state *textures);
 void r600_decompress_color_textures(struct r600_context *rctx,
                                    struct r600_samplerview_state *textures);
-
-/* r600_buffer.c */
-struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
-                                        const struct pipe_resource *templ,
-                                        unsigned alignment);
-
-/* r600_pipe.c */
-const char * r600_llvm_gpu_string(enum radeon_family family);
-
-
-/* r600_query.c */
-void r600_init_query_functions(struct r600_context *rctx);
-void r600_suspend_nontimer_queries(struct r600_context *ctx);
-void r600_resume_nontimer_queries(struct r600_context *ctx);
-
-/* r600_resource.c */
-void r600_init_context_resource_functions(struct r600_context *r600);
+void r600_resource_copy_region(struct pipe_context *ctx,
+                              struct pipe_resource *dst,
+                              unsigned dst_level,
+                              unsigned dstx, unsigned dsty, unsigned dstz,
+                              struct pipe_resource *src,
+                              unsigned src_level,
+                              const struct pipe_box *src_box);
 
 /* r600_shader.c */
 int r600_pipe_shader_create(struct pipe_context *ctx,
@@ -634,6 +583,8 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
 void r600_init_state_functions(struct r600_context *rctx);
 void r600_init_atom_start_cs(struct r600_context *rctx);
 void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
+void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader);
 void *r600_create_db_flush_dsa(struct r600_context *rctx);
 void *r600_create_resolve_blend(struct r600_context *rctx);
@@ -648,12 +599,11 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 void r600_update_db_shader_control(struct r600_context * rctx);
 
 /* r600_hw_context.c */
-void r600_get_backend_mask(struct r600_context *ctx);
-void r600_context_flush(struct r600_context *ctx, unsigned flags);
+void r600_context_gfx_flush(void *context, unsigned flags,
+                           struct pipe_fence_handle **fence);
 void r600_begin_new_cs(struct r600_context *ctx);
 void r600_flush_emit(struct r600_context *ctx);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
-void r600_need_dma_space(struct r600_context *ctx, unsigned num_dw);
 void r600_cp_dma_copy_buffer(struct r600_context *rctx,
                             struct pipe_resource *dst, uint64_t dst_offset,
                             struct pipe_resource *src, uint64_t src_offset,
@@ -661,22 +611,22 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
                                   struct pipe_resource *dst, uint64_t offset,
                                   unsigned size, uint32_t clear_value);
-void r600_dma_copy(struct r600_context *rctx,
-               struct pipe_resource *dst,
-               struct pipe_resource *src,
-               uint64_t dst_offset,
-               uint64_t src_offset,
-               uint64_t size);
+void r600_dma_copy_buffer(struct r600_context *rctx,
+                         struct pipe_resource *dst,
+                         struct pipe_resource *src,
+                         uint64_t dst_offset,
+                         uint64_t src_offset,
+                         uint64_t size);
 
 /*
  * evergreen_hw_context.c
  */
-void evergreen_dma_copy(struct r600_context *rctx,
-               struct pipe_resource *dst,
-               struct pipe_resource *src,
-               uint64_t dst_offset,
-               uint64_t src_offset,
-               uint64_t size);
+void evergreen_dma_copy_buffer(struct r600_context *rctx,
+                              struct pipe_resource *dst,
+                              struct pipe_resource *src,
+                              uint64_t dst_offset,
+                              uint64_t src_offset,
+                              uint64_t size);
 
 /* r600_state_common.c */
 void r600_init_common_state_functions(struct r600_context *rctx);
@@ -697,9 +647,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
 void r600_sampler_states_dirty(struct r600_context *rctx,
                               struct r600_sampler_states *state);
 void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
-void r600_draw_rectangle(struct blitter_context *blitter,
-                        int x1, int y1, int x2, int y2, float depth,
-                        enum blitter_attrib_type type, const union pipe_color_union *attrib);
+void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
 uint32_t r600_translate_stencil_op(int s_op);
 uint32_t r600_translate_fill(uint32_t func);
 unsigned r600_tex_wrap(unsigned wrap);
@@ -717,7 +665,8 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
 uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
                                  const unsigned char *swizzle_view,
                                  uint32_t *word4_p, uint32_t *yuv_format_p);
-void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf);
+uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format);
+uint32_t r600_colorformat_endian_swap(uint32_t colorformat);
 
 /* r600_uvd.c */
 struct pipe_video_codec *r600_uvd_create_decoder(struct pipe_context *context,
@@ -894,15 +843,6 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
 }
 #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
 
-static inline unsigned r600_tex_aniso_filter(unsigned filter)
-{
-       if (filter <= 1)   return 0;
-       if (filter <= 2)   return 1;
-       if (filter <= 4)   return 2;
-       if (filter <= 8)   return 3;
-        /* else */        return 4;
-}
-
 /* 12.4 fixed-point */
 static INLINE unsigned r600_pack_float_12p4(float x)
 {
@@ -910,4 +850,40 @@ static INLINE unsigned r600_pack_float_12p4(float x)
               x >= 4096 ? 0xffff : x * 16;
 }
 
+/* Return if the depth format can be read without the DB->CB copy on r6xx-r7xx. */
+static INLINE bool r600_can_read_depth(struct r600_texture *rtex)
+{
+       return rtex->resource.b.b.nr_samples <= 1 &&
+              (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
+               rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT);
+}
+
+#define     V_028A6C_OUTPRIM_TYPE_POINTLIST            0
+#define     V_028A6C_OUTPRIM_TYPE_LINESTRIP            1
+#define     V_028A6C_OUTPRIM_TYPE_TRISTRIP             2
+
+static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+{
+       static const int prim_conv[] = {
+               V_028A6C_OUTPRIM_TYPE_POINTLIST,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+               V_028A6C_OUTPRIM_TYPE_TRISTRIP
+       };
+       assert(mode < Elements(prim_conv));
+
+       return prim_conv[mode];
+}
+
 #endif