i965/gen7.5: Fix lower bound on number of VS URB entries.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.h
index 1ceb40f67b466b4568eb06bda829135329630445..57f086bdb5c99693d0579e8e56441a311f167455 100644 (file)
@@ -125,19 +125,19 @@ extern "C" {
 struct brw_context;
 struct brw_instruction;
 struct brw_vs_prog_key;
+struct brw_vec4_prog_key;
 struct brw_wm_prog_key;
 struct brw_wm_prog_data;
 
 enum brw_state_id {
    BRW_STATE_URB_FENCE,
    BRW_STATE_FRAGMENT_PROGRAM,
+   BRW_STATE_GEOMETRY_PROGRAM,
    BRW_STATE_VERTEX_PROGRAM,
-   BRW_STATE_INPUT_DIMENSIONS,
    BRW_STATE_CURBE_OFFSETS,
    BRW_STATE_REDUCED_PRIMITIVE,
    BRW_STATE_PRIMITIVE,
    BRW_STATE_CONTEXT,
-   BRW_STATE_WM_INPUT_DIMENSIONS,
    BRW_STATE_PSP,
    BRW_STATE_SURFACES,
    BRW_STATE_VS_BINDING_TABLE,
@@ -146,24 +146,31 @@ enum brw_state_id {
    BRW_STATE_INDICES,
    BRW_STATE_VERTICES,
    BRW_STATE_BATCH,
-   BRW_STATE_NR_WM_SURFACES,
-   BRW_STATE_NR_VS_SURFACES,
    BRW_STATE_INDEX_BUFFER,
    BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_GS_CONSTBUF,
    BRW_STATE_PROGRAM_CACHE,
    BRW_STATE_STATE_BASE_ADDRESS,
-   BRW_STATE_SOL_INDICES,
+   BRW_STATE_VUE_MAP_VS,
+   BRW_STATE_VUE_MAP_GEOM_OUT,
+   BRW_STATE_TRANSFORM_FEEDBACK,
+   BRW_STATE_RASTERIZER_DISCARD,
+   BRW_STATE_STATS_WM,
+   BRW_STATE_UNIFORM_BUFFER,
+   BRW_STATE_META_IN_PROGRESS,
+   BRW_STATE_INTERPOLATION_MAP,
+   BRW_STATE_PUSH_CONSTANT_ALLOCATION,
+   BRW_NUM_STATE_BITS
 };
 
 #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
 #define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_GEOMETRY_PROGRAM        (1 << BRW_STATE_GEOMETRY_PROGRAM)
 #define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
-#define BRW_NEW_INPUT_DIMENSIONS        (1 << BRW_STATE_INPUT_DIMENSIONS)
 #define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
 #define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
 #define BRW_NEW_PRIMITIVE               (1 << BRW_STATE_PRIMITIVE)
 #define BRW_NEW_CONTEXT                 (1 << BRW_STATE_CONTEXT)
-#define BRW_NEW_WM_INPUT_DIMENSIONS     (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
 #define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
 #define BRW_NEW_SURFACES               (1 << BRW_STATE_SURFACES)
 #define BRW_NEW_VS_BINDING_TABLE       (1 << BRW_STATE_VS_BINDING_TABLE)
@@ -179,9 +186,18 @@ enum brw_state_id {
 /** \see brw.state.depth_region */
 #define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_GS_CONSTBUF            (1 << BRW_STATE_GS_CONSTBUF)
 #define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
 #define BRW_NEW_STATE_BASE_ADDRESS     (1 << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_SOL_INDICES            (1 << BRW_STATE_SOL_INDICES)
+#define BRW_NEW_VUE_MAP_VS             (1 << BRW_STATE_VUE_MAP_VS)
+#define BRW_NEW_VUE_MAP_GEOM_OUT       (1 << BRW_STATE_VUE_MAP_GEOM_OUT)
+#define BRW_NEW_TRANSFORM_FEEDBACK     (1 << BRW_STATE_TRANSFORM_FEEDBACK)
+#define BRW_NEW_RASTERIZER_DISCARD     (1 << BRW_STATE_RASTERIZER_DISCARD)
+#define BRW_NEW_STATS_WM               (1 << BRW_STATE_STATS_WM)
+#define BRW_NEW_UNIFORM_BUFFER          (1 << BRW_STATE_UNIFORM_BUFFER)
+#define BRW_NEW_META_IN_PROGRESS        (1 << BRW_STATE_META_IN_PROGRESS)
+#define BRW_NEW_INTERPOLATION_MAP       (1 << BRW_STATE_INTERPOLATION_MAP)
+#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -266,6 +282,13 @@ struct brw_vertex_program {
 };
 
 
+/** Subclass of Mesa geometry program */
+struct brw_geometry_program {
+   struct gl_geometry_program program;
+   unsigned id;  /**< serial no. to identify geom progs, never re-used */
+};
+
+
 /** Subclass of Mesa fragment program */
 struct brw_fragment_program {
    struct gl_fragment_program program;
@@ -299,6 +322,8 @@ struct brw_wm_prog_data {
    GLuint reg_blocks_16;
    GLuint total_scratch;
 
+   unsigned binding_table_size;
+
    GLuint nr_params;       /**< number of float params/constants */
    GLuint nr_pull_params;
    bool dual_src_blend;
@@ -329,7 +354,6 @@ struct brw_wm_prog_data {
 typedef enum
 {
    BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
-   BRW_VARYING_SLOT_POS_DUPLICATE,
    BRW_VARYING_SLOT_PAD,
    /**
     * Technically this is not a varying but just a placeholder that
@@ -354,13 +378,20 @@ typedef enum
  * it contains data for two separate vertices.
  */
 struct brw_vue_map {
+   /**
+    * Bitfield representing all varying slots that are (a) stored in this VUE
+    * map, and (b) actually written by the shader.  Does not include any of
+    * the additional varying slots defined in brw_varying_slot.
+    */
+   GLbitfield64 slots_valid;
+
    /**
     * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
     * not stored in a slot (because they are not written, or because
     * additional processing is applied before storing them in the VUE), the
     * value is -1.
     */
-   int varying_to_slot[BRW_VARYING_SLOT_COUNT];
+   signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
 
    /**
     * Map from VUE slot to gl_varying_slot value.  For slots that do not
@@ -371,7 +402,7 @@ struct brw_vue_map {
     * simplifies code that uses the value stored in slot_to_varying to
     * create a bit mask).
     */
-   int slot_to_varying[BRW_VARYING_SLOT_COUNT];
+   signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
 
    /**
     * Total number of VUE slots in use
@@ -397,6 +428,35 @@ static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
    return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
 }
 
+void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
+                         GLbitfield64 slots_valid, bool userclip_active);
+
+
+/*
+ * Mapping of VUE map slots to interpolation modes.
+ */
+struct interpolation_mode_map {
+   unsigned char mode[BRW_VARYING_SLOT_COUNT];
+};
+
+static inline bool brw_any_flat_varyings(struct interpolation_mode_map *map)
+{
+   for (int i = 0; i < BRW_VARYING_SLOT_COUNT; i++)
+      if (map->mode[i] == INTERP_QUALIFIER_FLAT)
+         return true;
+
+   return false;
+}
+
+static inline bool brw_any_noperspective_varyings(struct interpolation_mode_map *map)
+{
+   for (int i = 0; i < BRW_VARYING_SLOT_COUNT; i++)
+      if (map->mode[i] == INTERP_QUALIFIER_NOPERSPECTIVE)
+         return true;
+
+   return false;
+}
+
 
 struct brw_sf_prog_data {
    GLuint urb_read_length;
@@ -418,7 +478,7 @@ struct brw_clip_prog_data {
    GLuint total_grf;
 };
 
-struct brw_gs_prog_data {
+struct brw_ff_gs_prog_data {
    GLuint urb_read_length;
    GLuint total_grf;
 
@@ -429,35 +489,67 @@ struct brw_gs_prog_data {
    unsigned svbi_postincrement_value;
 };
 
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
+
+/* Note: brw_vec4_prog_data_compare() must be updated when adding fields to
+ * this struct!
  */
-struct brw_vs_prog_data {
+struct brw_vec4_prog_data {
    struct brw_vue_map vue_map;
 
+   /**
+    * Register where the thread expects to find input data from the URB
+    * (typically uniforms, followed by per-vertex inputs).
+    */
+   unsigned dispatch_grf_start_reg;
+
    GLuint curb_read_length;
    GLuint urb_read_length;
    GLuint total_grf;
-   GLbitfield64 outputs_written;
    GLuint nr_params;       /**< number of float params/constants */
    GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */
    GLuint total_scratch;
 
-   GLbitfield64 inputs_read;
-
-   /* Used for calculating urb partitions:
+   /* Used for calculating urb partitions.  In the VS, this is the size of the
+    * URB entry used for both input and output to the thread.  In the GS, this
+    * is the size of the URB entry used for output.
     */
    GLuint urb_entry_size;
 
-   bool uses_vertexid;
+   unsigned binding_table_size;
 
-   int num_surfaces;
-
-   /* These pointers must appear last.  See brw_vs_prog_data_compare(). */
+   /* These pointers must appear last.  See brw_vec4_prog_data_compare(). */
    const float **param;
    const float **pull_param;
 };
 
+
+/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
+ * struct!
+ */
+struct brw_vs_prog_data {
+   struct brw_vec4_prog_data base;
+
+   GLbitfield64 inputs_read;
+
+   bool uses_vertexid;
+};
+
+
+/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
+ * this struct!
+ */
+struct brw_gs_prog_data
+{
+   struct brw_vec4_prog_data base;
+
+   /**
+    * Size of an output vertex, measured in HWORDS (32 bytes).
+    */
+   unsigned output_vertex_size_hwords;
+
+   unsigned output_topology;
+};
+
 /** Number of texture sampler units */
 #define BRW_MAX_TEX_UNIT 16
 
@@ -527,10 +619,10 @@ struct brw_vs_prog_data {
  *    |  36 | UBO 11                  |
  *    +-------------------------------+
  *
- * Our VS binding tables are programmed as follows:
+ * Our VS (and Gen7 GS) binding tables are programmed as follows:
  *
  *    +-----+-------------------------+
- *    |   0 | VS Pull Constant Buffer |
+ *    |   0 | Pull Constant Buffer    |
  *    +-----+-------------------------+
  *    |   1 | Texture 0               |
  *    |   . |     .                   |
@@ -551,10 +643,6 @@ struct brw_vs_prog_data {
  *    |   : |     :                   |
  *    |  63 | SOL Binding 63          |
  *    +-----+-------------------------+
- *
- * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
- * the identity function or things will break.  We do want to keep draw buffers
- * first so we can use headerless render target writes for RT 0.
  */
 #define SURF_INDEX_DRAW(d)           (d)
 #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
@@ -564,14 +652,14 @@ struct brw_vs_prog_data {
 /** Maximum size of the binding table. */
 #define BRW_MAX_WM_SURFACES          (SURF_INDEX_WM_SHADER_TIME + 1)
 
-#define SURF_INDEX_VERT_CONST_BUFFER (0)
-#define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
-#define SURF_INDEX_VS_UBO(u)         (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u)
-#define SURF_INDEX_VS_SHADER_TIME    (SURF_INDEX_VS_UBO(12))
-#define BRW_MAX_VS_SURFACES          (SURF_INDEX_VS_SHADER_TIME + 1)
+#define SURF_INDEX_VEC4_CONST_BUFFER (0)
+#define SURF_INDEX_VEC4_TEXTURE(t)   (SURF_INDEX_VEC4_CONST_BUFFER + 1 + (t))
+#define SURF_INDEX_VEC4_UBO(u)       (SURF_INDEX_VEC4_TEXTURE(BRW_MAX_TEX_UNIT) + u)
+#define SURF_INDEX_VEC4_SHADER_TIME  (SURF_INDEX_VEC4_UBO(12))
+#define BRW_MAX_VEC4_SURFACES        (SURF_INDEX_VEC4_SHADER_TIME + 1)
 
-#define SURF_INDEX_SOL_BINDING(t)    ((t))
-#define BRW_MAX_GS_SURFACES          SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
+#define BRW_MAX_GEN6_GS_SURFACES       SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
 
 /**
  * Stride in bytes between shader_time entries.
@@ -582,13 +670,11 @@ struct brw_vs_prog_data {
 #define SHADER_TIME_STRIDE 64
 
 enum brw_cache_id {
-   BRW_BLEND_STATE,
-   BRW_DEPTH_STENCIL_STATE,
-   BRW_COLOR_CALC_STATE,
    BRW_CC_VP,
    BRW_CC_UNIT,
    BRW_WM_PROG,
    BRW_BLORP_BLIT_PROG,
+   BRW_BLORP_CONST_COLOR_PROG,
    BRW_SAMPLER,
    BRW_WM_UNIT,
    BRW_SF_PROG,
@@ -596,7 +682,8 @@ enum brw_cache_id {
    BRW_SF_UNIT, /* scissor state on gen6 */
    BRW_VS_UNIT,
    BRW_VS_PROG,
-   BRW_GS_UNIT,
+   BRW_FF_GS_UNIT,
+   BRW_FF_GS_PROG,
    BRW_GS_PROG,
    BRW_CLIP_VP,
    BRW_CLIP_UNIT,
@@ -676,9 +763,6 @@ enum shader_time_shader_type {
 
 /* Flags for brw->state.cache.
  */
-#define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
-#define CACHE_NEW_DEPTH_STENCIL_STATE    (1<<BRW_DEPTH_STENCIL_STATE)
-#define CACHE_NEW_COLOR_CALC_STATE       (1<<BRW_COLOR_CALC_STATE)
 #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
 #define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
 #define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
@@ -689,7 +773,8 @@ enum shader_time_shader_type {
 #define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
 #define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
 #define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
-#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
+#define CACHE_NEW_FF_GS_UNIT             (1<<BRW_FF_GS_UNIT)
+#define CACHE_NEW_FF_GS_PROG             (1<<BRW_FF_GS_PROG)
 #define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
 #define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
 #define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
@@ -701,13 +786,6 @@ struct brw_cached_batch_item {
    struct brw_cached_batch_item *next;
 };
    
-
-
-/* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
- * be easier if C allowed arrays of packed elements?
- */
-#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
-
 struct brw_vertex_buffer {
    /** Buffer object containing the uploaded vertex data */
    drm_intel_bo *bo;
@@ -727,12 +805,6 @@ struct brw_vertex_element {
    unsigned int offset;
 };
 
-
-
-struct brw_vertex_info {
-   GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
-};
-
 struct brw_query_object {
    struct gl_query_object Base;
 
@@ -745,20 +817,175 @@ struct brw_query_object {
 
 
 /**
- * brw_context is derived from intel_context.
+ * Data shared between brw_context::vs and brw_context::gs
+ */
+struct brw_stage_state
+{
+   drm_intel_bo *scratch_bo;
+   drm_intel_bo *const_bo;
+   /** Offset in the program cache to the program */
+   uint32_t prog_offset;
+   uint32_t state_offset;
+
+   uint32_t push_const_offset; /* Offset in the batchbuffer */
+   int push_const_size; /* in 256-bit register increments */
+
+   uint32_t bind_bo_offset;
+   uint32_t surf_offset[BRW_MAX_VEC4_SURFACES];
+
+   /** SAMPLER_STATE count and table offset */
+   uint32_t sampler_count;
+   uint32_t sampler_offset;
+
+   /** Offsets in the batch to sampler default colors (texture border color) */
+   uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
+};
+
+
+/**
+ * brw_context is derived from gl_context.
  */
 struct brw_context 
 {
-   struct intel_context intel;  /**< base class, must be first field */
+   struct gl_context ctx; /**< base class, must be first field */
+
+   struct
+   {
+      void (*destroy) (struct brw_context * brw);
+      void (*finish_batch) (struct brw_context * brw);
+      void (*new_batch) (struct brw_context * brw);
+
+      void (*update_texture_surface)(struct gl_context *ctx,
+                                     unsigned unit,
+                                     uint32_t *surf_offset);
+      void (*update_renderbuffer_surface)(struct brw_context *brw,
+                                         struct gl_renderbuffer *rb,
+                                         bool layered,
+                                         unsigned unit);
+      void (*update_null_renderbuffer_surface)(struct brw_context *brw,
+                                              unsigned unit);
+      void (*create_constant_surface)(struct brw_context *brw,
+                                     drm_intel_bo *bo,
+                                     uint32_t offset,
+                                     uint32_t size,
+                                     uint32_t *out_offset,
+                                      bool dword_pitch);
+
+      /** Upload a SAMPLER_STATE table. */
+      void (*upload_sampler_state_table)(struct brw_context *brw,
+                                         struct gl_program *prog,
+                                         uint32_t sampler_count,
+                                         uint32_t *sst_offset,
+                                         uint32_t *sdc_offset);
+
+      /**
+       * Send the appropriate state packets to configure depth, stencil, and
+       * HiZ buffers (i965+ only)
+       */
+      void (*emit_depth_stencil_hiz)(struct brw_context *brw,
+                                     struct intel_mipmap_tree *depth_mt,
+                                     uint32_t depth_offset,
+                                     uint32_t depthbuffer_format,
+                                     uint32_t depth_surface_type,
+                                     struct intel_mipmap_tree *stencil_mt,
+                                     bool hiz, bool separate_stencil,
+                                     uint32_t width, uint32_t height,
+                                     uint32_t tile_x, uint32_t tile_y);
+
+   } vtbl;
+
+   dri_bufmgr *bufmgr;
+
+   drm_intel_context *hw_ctx;
+
+   struct intel_batchbuffer batch;
+   bool no_batch_wrap;
+
+   struct {
+      drm_intel_bo *bo;
+      GLuint offset;
+      uint32_t buffer_len;
+      uint32_t buffer_offset;
+      char buffer[4096];
+   } upload;
+
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   bool front_buffer_dirty;
+
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   bool is_front_buffer_rendering;
+
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   bool is_front_buffer_reading;
+
+   /** Framerate throttling: @{ */
+   drm_intel_bo *first_post_swapbuffers_batch;
+   bool need_throttle;
+   /** @} */
+
+   GLuint stats_wm;
+
+   /**
+    * drirc options:
+    * @{
+    */
+   bool no_rast;
+   bool always_flush_batch;
+   bool always_flush_cache;
+   bool disable_throttling;
+   bool precompile;
+
+   driOptionCache optionCache;
+   /** @} */
+
    GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
 
+   GLenum reduced_primitive;
+
+   /**
+    * Set if we're either a debug context or the INTEL_DEBUG=perf environment
+    * variable is set, this is the flag indicating to do expensive work that
+    * might lead to a perf_debug() call.
+    */
+   bool perf_debug;
+
+   uint32_t max_gtt_map_object_size;
+
    bool emit_state_always;
+
+   int gen;
+   int gt;
+
+   bool is_g4x;
+   bool is_baytrail;
+   bool is_haswell;
+
+   bool has_hiz;
+   bool has_separate_stencil;
+   bool must_use_separate_stencil;
+   bool has_llc;
+   bool has_swizzling;
    bool has_surface_tile_offset;
    bool has_compr4;
    bool has_negative_rhw_bug;
    bool has_aa_line_parameters;
    bool has_pln;
-   bool precompile;
 
    /**
     * Some versions of Gen hardware don't do centroid interpolation correctly
@@ -769,6 +996,7 @@ struct brw_context
     */
    bool needs_unlit_centroid_workaround;
 
+   GLuint NewGLState;
    struct {
       struct brw_state_flags dirty;
    } state;
@@ -776,6 +1004,9 @@ struct brw_context
    struct brw_cache cache;
    struct brw_cached_batch_item *cached_batch_items;
 
+   /* Whether a meta-operation is in progress. */
+   bool meta_in_progress;
+
    struct {
       struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
       struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
@@ -787,7 +1018,6 @@ struct brw_context
       /* Summary of size and varying of active arrays, so we can check
        * for changes to this state:
        */
-      struct brw_vertex_info info;
       unsigned int min_index, max_index;
 
       /* Offset from start of vertex buffer so we can avoid redefining
@@ -818,6 +1048,7 @@ struct brw_context
    /* Active vertex program: 
     */
    const struct gl_vertex_program *vertex_program;
+   const struct gl_geometry_program *geometry_program;
    const struct gl_fragment_program *fragment_program;
 
    /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
@@ -842,6 +1073,7 @@ struct brw_context
 
       bool constrained;
 
+      GLuint min_vs_entries;    /* Minimum number of VS entries */
       GLuint max_vs_entries;   /* Maximum number of VS entries */
       GLuint max_gs_entries;   /* Maximum number of GS entries */
 
@@ -851,15 +1083,6 @@ struct brw_context
       GLuint nr_sf_entries;
       GLuint nr_cs_entries;
 
-      /* gen6:
-       * The length of each URB entry owned by the VS (or GS), as
-       * a number of 1024-bit (128-byte) rows.  Should be >= 1.
-       *
-       * gen7: Same meaning, but in 512-bit (64-byte) rows.
-       */
-      GLuint vs_size;
-      GLuint gs_size;
-
       GLuint vs_start;
       GLuint gs_start;
       GLuint clip_start;
@@ -906,26 +1129,27 @@ struct brw_context
       GLuint last_bufsz;
    } curbe;
 
-   /** SAMPLER_STATE count and offset */
-   struct {
-      GLuint count;
-      uint32_t offset;
-   } sampler;
-
-   struct {
-      struct brw_vs_prog_data *prog_data;
-
-      drm_intel_bo *scratch_bo;
-      drm_intel_bo *const_bo;
-      /** Offset in the program cache to the VS program */
-      uint32_t prog_offset;
-      uint32_t state_offset;
-
-      uint32_t push_const_offset; /* Offset in the batchbuffer */
-      int push_const_size; /* in 256-bit register increments */
+   /**
+    * Layout of vertex data exiting the vertex shader.
+    *
+    * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
+    */
+   struct brw_vue_map vue_map_vs;
 
-      /** @{ register allocator */
+   /**
+    * Layout of vertex data exiting the geometry portion of the pipleine.
+    * This comes from the geometry shader if one exists, otherwise from the
+    * vertex shader.
+    *
+    * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
+    */
+   struct brw_vue_map vue_map_geom_out;
 
+   /**
+    * Data structures used by all vec4 program compiles (not specific to any
+    * particular program).
+    */
+   struct {
       struct ra_regs *regs;
 
       /**
@@ -939,14 +1163,20 @@ struct brw_context
        * GRF for that object.
       */
       uint8_t *ra_reg_to_grf;
-      /** @} */
+   } vec4;
 
-      uint32_t bind_bo_offset;
-      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
+   struct {
+      struct brw_stage_state base;
+      struct brw_vs_prog_data *prog_data;
    } vs;
 
    struct {
+      struct brw_stage_state base;
       struct brw_gs_prog_data *prog_data;
+   } gs;
+
+   struct {
+      struct brw_ff_gs_prog_data *prog_data;
 
       bool prog_active;
       /** Offset in the program cache to the CLIP program pre-gen6 */
@@ -954,8 +1184,8 @@ struct brw_context
       uint32_t state_offset;
 
       uint32_t bind_bo_offset;
-      uint32_t surf_offset[BRW_MAX_GS_SURFACES];
-   } gs;
+      uint32_t surf_offset[BRW_MAX_GEN6_GS_SURFACES];
+   } ff_gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
@@ -985,15 +1215,6 @@ struct brw_context
    struct {
       struct brw_wm_prog_data *prog_data;
 
-      /** Input sizes, calculated from active vertex program.
-       * One bit per fragment program input attribute.
-       */
-      GLbitfield64 input_size_masks[4];
-
-      /** offsets in the batch to sampler default colors (texture border color)
-       */
-      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
-
       GLuint render_surf;
 
       drm_intel_bo *scratch_bo;
@@ -1021,6 +1242,14 @@ struct brw_context
       uint32_t bind_bo_offset;
       uint32_t surf_offset[BRW_MAX_WM_SURFACES];
 
+      /** SAMPLER_STATE count and table offset */
+      uint32_t sampler_count;
+      uint32_t sampler_offset;
+
+      /** Offsets in the batch to sampler default colors (texture border color)
+       */
+      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
+
       struct {
          struct ra_regs *regs;
 
@@ -1067,19 +1296,14 @@ struct brw_context
    } *state_batch_list;
    int state_batch_count;
 
-   struct brw_sol_state {
-      uint32_t svbi_0_starting_index;
-      uint32_t svbi_0_max_index;
-      uint32_t offset_0_batch_start;
-      uint32_t primitives_generated;
-      uint32_t primitives_written;
-      bool counting_primitives_generated;
-      bool counting_primitives_written;
-   } sol;
-
    uint32_t render_target_format[MESA_FORMAT_COUNT];
    bool format_supported_as_render_target[MESA_FORMAT_COUNT];
 
+   /* Interpolation modes, one byte per vue slot.
+    * Used Gen4/5 by the clip|sf|wm stages. Ignored on Gen6+.
+    */
+   struct interpolation_mode_map interpolation_mode;
+
    /* PrimitiveRestart */
    struct {
       bool in_progress;
@@ -1093,7 +1317,6 @@ struct brw_context
    struct {
       struct intel_mipmap_tree *depth_mt;
       struct intel_mipmap_tree *stencil_mt;
-      struct intel_mipmap_tree *hiz_mt;
 
       /* Inter-tile (page-aligned) byte offsets. */
       uint32_t depth_offset, hiz_offset, stencil_offset;
@@ -1106,13 +1329,19 @@ struct brw_context
 
    struct {
       drm_intel_bo *bo;
-      struct gl_shader_program **programs;
+      struct gl_shader_program **shader_programs;
+      struct gl_program **programs;
       enum shader_time_shader_type *types;
       uint64_t *cumulative;
       int num_entries;
       int max_entries;
       double report_time;
    } shader_time;
+
+   __DRIcontext *driContext;
+   struct intel_screen *intelScreen;
+   void (*saved_viewport)(struct gl_context *ctx,
+                          GLint x, GLint y, GLsizei width, GLsizei height);
 };
 
 /*======================================================================
@@ -1136,24 +1365,33 @@ bool brwCreateContext(int api,
  * brw_misc_state.c
  */
 void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
+                                     uint32_t depth_level,
+                                     uint32_t depth_layer,
                                      struct intel_mipmap_tree *stencil_mt,
                                      uint32_t *out_tile_mask_x,
                                      uint32_t *out_tile_mask_y);
 void brw_workaround_depthstencil_alignment(struct brw_context *brw,
                                            GLbitfield clear_mask);
 
+/* brw_object_purgeable.c */
+void brw_init_object_purgeable_functions(struct dd_function_table *functions);
+
 /*======================================================================
  * brw_queryobj.c
  */
-void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_init_common_queryobj_functions(struct dd_function_table *functions);
+void gen4_init_queryobj_functions(struct dd_function_table *functions);
 void brw_emit_query_begin(struct brw_context *brw);
 void brw_emit_query_end(struct brw_context *brw);
 
+/** gen6_queryobj.c */
+void gen6_init_queryobj_functions(struct dd_function_table *functions);
+
 /*======================================================================
  * brw_state_dump.c
  */
-void brw_debug_batch(struct intel_context *intel);
-void brw_annotate_aub(struct intel_context *intel);
+void brw_debug_batch(struct brw_context *brw);
+void brw_annotate_aub(struct brw_context *brw);
 
 /*======================================================================
  * brw_tex.c
@@ -1167,9 +1405,13 @@ void brw_validate_textures( struct brw_context *brw );
 void brwInitFragProgFuncs( struct dd_function_table *functions );
 
 int brw_get_scratch_size(int size);
-void brw_get_scratch_bo(struct intel_context *intel,
+void brw_get_scratch_bo(struct brw_context *brw,
                        drm_intel_bo **scratch_bo, int size);
 void brw_init_shader_time(struct brw_context *brw);
+int brw_get_shader_time_index(struct brw_context *brw,
+                              struct gl_shader_program *shader_prog,
+                              struct gl_program *prog,
+                              enum shader_time_shader_type type);
 void brw_collect_and_report_shader_time(struct brw_context *brw);
 void brw_destroy_shader_time(struct brw_context *brw);
 
@@ -1185,12 +1427,20 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
  */
 void brw_fs_alloc_reg_sets(struct brw_context *brw);
 
+/* brw_vec4_reg_allocate.cpp */
+void brw_vec4_alloc_reg_set(struct brw_context *brw);
+
 /* brw_disasm.c */
 int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
 
 /* brw_vs.c */
 gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
 
+/* brw_draw_upload.c */
+unsigned brw_get_vertex_surface_type(struct brw_context *brw,
+                                     const struct gl_client_array *glarray);
+unsigned brw_get_index_type(GLenum type);
+
 /* brw_wm_surface_state.c */
 void brw_init_surface_formats(struct brw_context *brw);
 void
@@ -1202,6 +1452,11 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
                             struct gl_shader *shader,
                             uint32_t *surf_offsets);
 
+/* brw_surface_formats.c */
+bool brw_is_hiz_depth_format(struct brw_context *ctx, gl_format format);
+bool brw_render_target_supported(struct brw_context *brw,
+                                 struct gl_renderbuffer *rb);
+
 /* gen6_sol.c */
 void
 brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
@@ -1212,20 +1467,24 @@ brw_end_transform_feedback(struct gl_context *ctx,
 
 /* gen7_sol_state.c */
 void
+gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+                              struct gl_transform_feedback_object *obj);
+void
 gen7_end_transform_feedback(struct gl_context *ctx,
                            struct gl_transform_feedback_object *obj);
 
 /* brw_blorp_blit.cpp */
 GLbitfield
-brw_blorp_framebuffer(struct intel_context *intel,
+brw_blorp_framebuffer(struct brw_context *brw,
                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                       GLbitfield mask, GLenum filter);
 
 bool
-brw_blorp_copytexsubimage(struct intel_context *intel,
+brw_blorp_copytexsubimage(struct brw_context *brw,
                           struct gl_renderbuffer *src_rb,
                           struct gl_texture_image *dst_image,
+                          int slice,
                           int srcX0, int srcY0,
                           int dstX0, int dstY0,
                           int width, int height);
@@ -1246,11 +1505,14 @@ gen6_get_sample_position(struct gl_context *ctx,
 
 /* gen7_urb.c */
 void
-gen7_allocate_push_constants(struct brw_context *brw);
+gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
+                              unsigned gs_size, unsigned fs_size);
 
 void
-gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
-                    GLuint vs_size, GLuint vs_start);
+gen7_emit_urb_state(struct brw_context *brw,
+                    unsigned nr_vs_entries, unsigned vs_size,
+                    unsigned vs_start, unsigned nr_gs_entries,
+                    unsigned gs_size, unsigned gs_start);
 
 
 
@@ -1303,14 +1565,12 @@ static inline uint32_t
 brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
                  uint32_t prog_offset)
 {
-   struct intel_context *intel = &brw->intel;
-
-   if (intel->gen >= 5) {
+   if (brw->gen >= 5) {
       /* Using state base address. */
       return prog_offset;
    }
 
-   drm_intel_bo_emit_reloc(intel->batch.bo,
+   drm_intel_bo_emit_reloc(brw->batch.bo,
                           state_offset,
                           brw->cache.bo,
                           prog_offset,
@@ -1320,7 +1580,8 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
 }
 
 bool brw_do_cubemap_normalize(struct exec_list *instructions);
-bool brw_lower_texture_gradients(struct exec_list *instructions);
+bool brw_lower_texture_gradients(struct brw_context *brw,
+                                 struct exec_list *instructions);
 
 struct opcode_desc {
     char    *name;
@@ -1330,6 +1591,43 @@ struct opcode_desc {
 
 extern const struct opcode_desc opcode_descs[128];
 
+void
+brw_emit_depthbuffer(struct brw_context *brw);
+
+void
+brw_emit_depth_stencil_hiz(struct brw_context *brw,
+                           struct intel_mipmap_tree *depth_mt,
+                           uint32_t depth_offset, uint32_t depthbuffer_format,
+                           uint32_t depth_surface_type,
+                           struct intel_mipmap_tree *stencil_mt,
+                           bool hiz, bool separate_stencil,
+                           uint32_t width, uint32_t height,
+                           uint32_t tile_x, uint32_t tile_y);
+
+void
+gen7_emit_depth_stencil_hiz(struct brw_context *brw,
+                            struct intel_mipmap_tree *depth_mt,
+                            uint32_t depth_offset, uint32_t depthbuffer_format,
+                            uint32_t depth_surface_type,
+                            struct intel_mipmap_tree *stencil_mt,
+                            bool hiz, bool separate_stencil,
+                            uint32_t width, uint32_t height,
+                            uint32_t tile_x, uint32_t tile_y);
+
+extern const GLuint prim_to_hw_prim[GL_POLYGON+1];
+
+void
+brw_setup_vec4_key_clip_info(struct brw_context *brw,
+                             struct brw_vec4_prog_key *key,
+                             bool program_uses_clip_distance);
+
+void
+gen6_upload_vec4_push_constants(struct brw_context *brw,
+                                const struct gl_program *prog,
+                                const struct brw_vec4_prog_data *prog_data,
+                                struct brw_stage_state *stage_state,
+                                enum state_struct_type type);
+
 #ifdef __cplusplus
 }
 #endif