i965: Use MESA_FORMAT_B8G8R8X8_SRGB for RGB visuals

[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.h
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index ed250d2c6a7f3952a7703a1091f8c4da36c8868d..1cc4c7b1282423cc506fe223dd2e90df7f4826a1 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -34,14 +34,11 @@
  #define BRWCONTEXT_INC
  
  #include <stdbool.h>
-#include <string.h>
-#include "main/imports.h"
  #include "main/macros.h"
-#include "main/mm.h"
  #include "main/mtypes.h"
  #include "brw_structs.h"
+#include "brw_compiler.h"
  #include "intel_aub.h"
-#include "program/prog_parameter.h"
  
  #ifdef __cplusplus
  extern "C" {
@@ -49,9 +46,7 @@ extern "C" {
          #define virtual virt
  #endif
  
-#include <drm.h>
  #include <intel_bufmgr.h>
-#include <i915_drm.h>
  #ifdef __cplusplus
         #undef virtual
  }
@@ -115,6 +110,12 @@ extern "C" {
   * enabled, it first passes them to a VS thread which is a good place
   * for the driver to implement any active vertex shader.
   *
+ * HS - Hull Shader (Tessellation Control Shader)
+ *
+ * TE - Tessellation Engine (Tessellation Primitive Generation)
+ *
+ * DS - Domain Shader (Tessellation Evaluation Shader)
+ *
   * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
   * enabled, incoming strips etc are passed to GS threads in individual
   * line/triangle/point units.  The GS thread may perform arbitary
@@ -148,6 +149,8 @@ struct brw_vs_prog_key;
  struct brw_vue_prog_key;
  struct brw_wm_prog_key;
  struct brw_wm_prog_data;
+struct brw_cs_prog_key;
+struct brw_cs_prog_data;
  
  enum brw_pipeline {
     BRW_RENDER_PIPELINE,
@@ -163,7 +166,10 @@ enum brw_cache_id {
     BRW_CACHE_VS_PROG,
     BRW_CACHE_FF_GS_PROG,
     BRW_CACHE_GS_PROG,
+   BRW_CACHE_TCS_PROG,
+   BRW_CACHE_TES_PROG,
     BRW_CACHE_CLIP_PROG,
+   BRW_CACHE_CS_PROG,
  
     BRW_MAX_CACHE
  };
@@ -173,31 +179,34 @@ enum brw_state_id {
     BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
     BRW_STATE_FRAGMENT_PROGRAM,
     BRW_STATE_GEOMETRY_PROGRAM,
+   BRW_STATE_TESS_CTRL_PROGRAM,
+   BRW_STATE_TESS_EVAL_PROGRAM,
     BRW_STATE_VERTEX_PROGRAM,
     BRW_STATE_CURBE_OFFSETS,
     BRW_STATE_REDUCED_PRIMITIVE,
+   BRW_STATE_PATCH_PRIMITIVE,
     BRW_STATE_PRIMITIVE,
     BRW_STATE_CONTEXT,
     BRW_STATE_PSP,
     BRW_STATE_SURFACES,
-   BRW_STATE_VS_BINDING_TABLE,
-   BRW_STATE_GS_BINDING_TABLE,
-   BRW_STATE_PS_BINDING_TABLE,
+   BRW_STATE_BINDING_TABLE_POINTERS,
     BRW_STATE_INDICES,
     BRW_STATE_VERTICES,
     BRW_STATE_BATCH,
     BRW_STATE_INDEX_BUFFER,
     BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_TCS_CONSTBUF,
+   BRW_STATE_TES_CONSTBUF,
     BRW_STATE_GS_CONSTBUF,
     BRW_STATE_PROGRAM_CACHE,
     BRW_STATE_STATE_BASE_ADDRESS,
-   BRW_STATE_VUE_MAP_VS,
     BRW_STATE_VUE_MAP_GEOM_OUT,
     BRW_STATE_TRANSFORM_FEEDBACK,
     BRW_STATE_RASTERIZER_DISCARD,
     BRW_STATE_STATS_WM,
     BRW_STATE_UNIFORM_BUFFER,
     BRW_STATE_ATOMIC_BUFFER,
+   BRW_STATE_IMAGE_UNITS,
     BRW_STATE_META_IN_PROGRESS,
     BRW_STATE_INTERPOLATION_MAP,
     BRW_STATE_PUSH_CONSTANT_ALLOCATION,
@@ -209,6 +218,9 @@ enum brw_state_id {
     BRW_STATE_CLIP_VP,
     BRW_STATE_SAMPLER_STATE_TABLE,
     BRW_STATE_VS_ATTRIB_WORKAROUNDS,
+   BRW_STATE_COMPUTE_PROGRAM,
+   BRW_STATE_CS_WORK_GROUPS,
+   BRW_STATE_URB_SIZE,
     BRW_NUM_STATE_BITS
  };
  
@@ -243,20 +255,24 @@ enum brw_state_id {
  #define BRW_NEW_VS_PROG_DATA            (1ull << BRW_CACHE_VS_PROG)
  #define BRW_NEW_FF_GS_PROG_DATA         (1ull << BRW_CACHE_FF_GS_PROG)
  #define BRW_NEW_GS_PROG_DATA            (1ull << BRW_CACHE_GS_PROG)
+#define BRW_NEW_TCS_PROG_DATA           (1ull << BRW_CACHE_TCS_PROG)
+#define BRW_NEW_TES_PROG_DATA           (1ull << BRW_CACHE_TES_PROG)
  #define BRW_NEW_CLIP_PROG_DATA          (1ull << BRW_CACHE_CLIP_PROG)
+#define BRW_NEW_CS_PROG_DATA            (1ull << BRW_CACHE_CS_PROG)
  #define BRW_NEW_URB_FENCE               (1ull << BRW_STATE_URB_FENCE)
  #define BRW_NEW_FRAGMENT_PROGRAM        (1ull << BRW_STATE_FRAGMENT_PROGRAM)
  #define BRW_NEW_GEOMETRY_PROGRAM        (1ull << BRW_STATE_GEOMETRY_PROGRAM)
+#define BRW_NEW_TESS_EVAL_PROGRAM       (1ull << BRW_STATE_TESS_EVAL_PROGRAM)
+#define BRW_NEW_TESS_CTRL_PROGRAM       (1ull << BRW_STATE_TESS_CTRL_PROGRAM)
  #define BRW_NEW_VERTEX_PROGRAM          (1ull << BRW_STATE_VERTEX_PROGRAM)
  #define BRW_NEW_CURBE_OFFSETS           (1ull << BRW_STATE_CURBE_OFFSETS)
  #define BRW_NEW_REDUCED_PRIMITIVE       (1ull << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PATCH_PRIMITIVE         (1ull << BRW_STATE_PATCH_PRIMITIVE)
  #define BRW_NEW_PRIMITIVE               (1ull << BRW_STATE_PRIMITIVE)
  #define BRW_NEW_CONTEXT                 (1ull << BRW_STATE_CONTEXT)
  #define BRW_NEW_PSP                     (1ull << BRW_STATE_PSP)
  #define BRW_NEW_SURFACES                (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_VS_BINDING_TABLE        (1ull << BRW_STATE_VS_BINDING_TABLE)
-#define BRW_NEW_GS_BINDING_TABLE        (1ull << BRW_STATE_GS_BINDING_TABLE)
-#define BRW_NEW_PS_BINDING_TABLE        (1ull << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_BINDING_TABLE_POINTERS  (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
  #define BRW_NEW_INDICES                 (1ull << BRW_STATE_INDICES)
  #define BRW_NEW_VERTICES                (1ull << BRW_STATE_VERTICES)
  /**
@@ -267,16 +283,18 @@ enum brw_state_id {
  /** \see brw.state.depth_region */
  #define BRW_NEW_INDEX_BUFFER            (1ull << BRW_STATE_INDEX_BUFFER)
  #define BRW_NEW_VS_CONSTBUF             (1ull << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_TCS_CONSTBUF            (1ull << BRW_STATE_TCS_CONSTBUF)
+#define BRW_NEW_TES_CONSTBUF            (1ull << BRW_STATE_TES_CONSTBUF)
  #define BRW_NEW_GS_CONSTBUF             (1ull << BRW_STATE_GS_CONSTBUF)
  #define BRW_NEW_PROGRAM_CACHE           (1ull << BRW_STATE_PROGRAM_CACHE)
  #define BRW_NEW_STATE_BASE_ADDRESS      (1ull << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_VUE_MAP_VS              (1ull << BRW_STATE_VUE_MAP_VS)
  #define BRW_NEW_VUE_MAP_GEOM_OUT        (1ull << BRW_STATE_VUE_MAP_GEOM_OUT)
  #define BRW_NEW_TRANSFORM_FEEDBACK      (1ull << BRW_STATE_TRANSFORM_FEEDBACK)
  #define BRW_NEW_RASTERIZER_DISCARD      (1ull << BRW_STATE_RASTERIZER_DISCARD)
  #define BRW_NEW_STATS_WM                (1ull << BRW_STATE_STATS_WM)
  #define BRW_NEW_UNIFORM_BUFFER          (1ull << BRW_STATE_UNIFORM_BUFFER)
  #define BRW_NEW_ATOMIC_BUFFER           (1ull << BRW_STATE_ATOMIC_BUFFER)
+#define BRW_NEW_IMAGE_UNITS             (1ull << BRW_STATE_IMAGE_UNITS)
  #define BRW_NEW_META_IN_PROGRESS        (1ull << BRW_STATE_META_IN_PROGRESS)
  #define BRW_NEW_INTERPOLATION_MAP       (1ull << BRW_STATE_INTERPOLATION_MAP)
  #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
@@ -288,6 +306,9 @@ enum brw_state_id {
  #define BRW_NEW_CLIP_VP                 (1ull << BRW_STATE_CLIP_VP)
  #define BRW_NEW_SAMPLER_STATE_TABLE     (1ull << BRW_STATE_SAMPLER_STATE_TABLE)
  #define BRW_NEW_VS_ATTRIB_WORKAROUNDS   (1ull << BRW_STATE_VS_ATTRIB_WORKAROUNDS)
+#define BRW_NEW_COMPUTE_PROGRAM         (1ull << BRW_STATE_COMPUTE_PROGRAM)
+#define BRW_NEW_CS_WORK_GROUPS          (1ull << BRW_STATE_CS_WORK_GROUPS)
+#define BRW_NEW_URB_SIZE                (1ull << BRW_STATE_URB_SIZE)
  
  struct brw_state_flags {
     /** State update flags signalled by mesa internals */
@@ -305,6 +326,20 @@ struct brw_vertex_program {
  };
  
  
+/** Subclass of Mesa tessellation control program */
+struct brw_tess_ctrl_program {
+   struct gl_tess_ctrl_program program;
+   unsigned id;  /**< serial no. to identify tess ctrl progs, never re-used */
+};
+
+
+/** Subclass of Mesa tessellation evaluation program */
+struct brw_tess_eval_program {
+   struct gl_tess_eval_program program;
+   unsigned id;  /**< serial no. to identify tess eval progs, never re-used */
+};
+
+
  /** Subclass of Mesa geometry program */
  struct brw_geometry_program {
     struct gl_geometry_program program;
@@ -332,185 +367,6 @@ struct brw_shader {
     bool compiled_once;
  };
  
-/* Note: If adding fields that need anything besides a normal memcmp() for
- * comparing them, be sure to go fix brw_stage_prog_data_compare().
- */
-struct brw_stage_prog_data {
-   struct {
-      /** size of our binding table. */
-      uint32_t size_bytes;
-
-      /** @{
-       * surface indices for the various groups of surfaces
-       */
-      uint32_t pull_constants_start;
-      uint32_t texture_start;
-      uint32_t gather_texture_start;
-      uint32_t ubo_start;
-      uint32_t abo_start;
-      uint32_t image_start;
-      uint32_t shader_time_start;
-      /** @} */
-   } binding_table;
-
-   GLuint nr_params;       /**< number of float params/constants */
-   GLuint nr_pull_params;
-
-   unsigned curb_read_length;
-   unsigned total_scratch;
-
-   /**
-    * Register where the thread expects to find input data from the URB
-    * (typically uniforms, followed by vertex or fragment attributes).
-    */
-   unsigned dispatch_grf_start_reg;
-
-   bool use_alt_mode; /**< Use ALT floating point mode?  Otherwise, IEEE. */
-
-   /* Pointers to tracked values (only valid once
-    * _mesa_load_state_parameters has been called at runtime).
-    *
-    * These must be the last fields of the struct (see
-    * brw_stage_prog_data_compare()).
-    */
-   const gl_constant_value **param;
-   const gl_constant_value **pull_param;
-};
-
-/* Data about a particular attempt to compile a program.  Note that
- * there can be many of these, each in a different GL state
- * corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs.
- *
- * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_wm_prog_data {
-   struct brw_stage_prog_data base;
-
-   GLuint num_varying_inputs;
-
-   GLuint dispatch_grf_start_reg_16;
-   GLuint reg_blocks;
-   GLuint reg_blocks_16;
-
-   struct {
-      /** @{
-       * surface indices the WM-specific surfaces
-       */
-      uint32_t render_target_start;
-      /** @} */
-   } binding_table;
-
-   uint8_t computed_depth_mode;
-
-   bool no_8;
-   bool dual_src_blend;
-   bool uses_pos_offset;
-   bool uses_omask;
-   bool uses_kill;
-   uint32_t prog_offset_16;
-
-   /**
-    * Mask of which interpolation modes are required by the fragment shader.
-    * Used in hardware setup on gen6+.
-    */
-   uint32_t barycentric_interp_modes;
-
-   /**
-    * Map from gl_varying_slot to the position within the FS setup data
-    * payload where the varying's attribute vertex deltas should be delivered.
-    * For varying slots that are not used by the FS, the value is -1.
-    */
-   int urb_setup[VARYING_SLOT_MAX];
-};
-
-/**
- * Enum representing the i965-specific vertex results that don't correspond
- * exactly to any element of gl_varying_slot.  The values of this enum are
- * assigned such that they don't conflict with gl_varying_slot.
- */
-typedef enum
-{
-   BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
-   BRW_VARYING_SLOT_PAD,
-   /**
-    * Technically this is not a varying but just a placeholder that
-    * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
-    * builtin variable to be compiled correctly. see compile_sf_prog() for
-    * more info.
-    */
-   BRW_VARYING_SLOT_PNTC,
-   BRW_VARYING_SLOT_COUNT
-} brw_varying_slot;
-
-
-/**
- * Data structure recording the relationship between the gl_varying_slot enum
- * and "slots" within the vertex URB entry (VUE).  A "slot" is defined as a
- * single octaword within the VUE (128 bits).
- *
- * Note that each BRW register contains 256 bits (2 octawords), so when
- * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
- * consecutive VUE slots.  When accessing the VUE in URB_INTERLEAVED mode (as
- * in a vertex shader), each register corresponds to a single VUE slot, since
- * it contains data for two separate vertices.
- */
-struct brw_vue_map {
-   /**
-    * Bitfield representing all varying slots that are (a) stored in this VUE
-    * map, and (b) actually written by the shader.  Does not include any of
-    * the additional varying slots defined in brw_varying_slot.
-    */
-   GLbitfield64 slots_valid;
-
-   /**
-    * Map from gl_varying_slot value to VUE slot.  For gl_varying_slots that are
-    * not stored in a slot (because they are not written, or because
-    * additional processing is applied before storing them in the VUE), the
-    * value is -1.
-    */
-   signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
-
-   /**
-    * Map from VUE slot to gl_varying_slot value.  For slots that do not
-    * directly correspond to a gl_varying_slot, the value comes from
-    * brw_varying_slot.
-    *
-    * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
-    * simplifies code that uses the value stored in slot_to_varying to
-    * create a bit mask).
-    */
-   signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
-
-   /**
-    * Total number of VUE slots in use
-    */
-   int num_slots;
-};
-
-/**
- * Convert a VUE slot number into a byte offset within the VUE.
- */
-static inline GLuint brw_vue_slot_to_offset(GLuint slot)
-{
-   return 16*slot;
-}
-
-/**
- * Convert a vertex output (brw_varying_slot) into a byte offset within the
- * VUE.
- */
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
-                                           GLuint varying)
-{
-   return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
-}
-
-void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
-                         GLbitfield64 slots_valid);
-
-
  /**
   * Bitmask indicating which fragment shader inputs represent varyings (and
   * hence have to be delivered to the fragment shader by the SF/SBE stage).
@@ -587,45 +443,18 @@ struct brw_ff_gs_prog_data {
     unsigned svbi_postincrement_value;
  };
  
-
-/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
-struct brw_vue_prog_data {
-   struct brw_stage_prog_data base;
-   struct brw_vue_map vue_map;
-
-   GLuint urb_read_length;
-   GLuint total_grf;
-
-   /* Used for calculating urb partitions.  In the VS, this is the size of the
-    * URB entry used for both input and output to the thread.  In the GS, this
-    * is the size of the URB entry used for output.
-    */
-   GLuint urb_entry_size;
-
-   bool simd8;
-};
-
-
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
-struct brw_vs_prog_data {
-   struct brw_vue_prog_data base;
-
-   GLbitfield64 inputs_read;
-
-   bool uses_vertexid;
-   bool uses_instanceid;
-};
-
  /** Number of texture sampler units */
  #define BRW_MAX_TEX_UNIT 32
  
  /** Max number of render targets in a shader */
  #define BRW_MAX_DRAW_BUFFERS 8
  
+/** Max number of UBOs in a shader */
+#define BRW_MAX_UBO 14
+
+/** Max number of SSBOs in a shader */
+#define BRW_MAX_SSBO 12
+
  /** Max number of atomic counter buffer objects in a shader */
  #define BRW_MAX_ABO 16
  
@@ -662,84 +491,15 @@ struct brw_vs_prog_data {
  
  #define BRW_MAX_SURFACES   (BRW_MAX_DRAW_BUFFERS +                      \
                              BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
-                            12 + /* ubo */                              \
+                            BRW_MAX_UBO +                               \
+                            BRW_MAX_SSBO +                              \
                              BRW_MAX_ABO +                               \
                              BRW_MAX_IMAGES +                            \
-                            2 /* shader time, pull constants */)
+                            2 + /* shader time, pull constants */       \
+                            1 /* cs num work groups */)
  
  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
  
-/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
-struct brw_gs_prog_data
-{
-   struct brw_vue_prog_data base;
-
-   /**
-    * Size of an output vertex, measured in HWORDS (32 bytes).
-    */
-   unsigned output_vertex_size_hwords;
-
-   unsigned output_topology;
-
-   /**
-    * Size of the control data (cut bits or StreamID bits), in hwords (32
-    * bytes).  0 if there is no control data.
-    */
-   unsigned control_data_header_size_hwords;
-
-   /**
-    * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
-    * if the control data is StreamID bits, or
-    * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
-    * Ignored if control_data_header_size is 0.
-    */
-   unsigned control_data_format;
-
-   bool include_primitive_id;
-
-   int invocations;
-
-   /**
-    * Dispatch mode, can be any of:
-    * GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
-    * GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
-    * GEN7_GS_DISPATCH_MODE_SINGLE
-    */
-   int dispatch_mode;
-
-   /**
-    * Gen6 transform feedback enabled flag.
-    */
-   bool gen6_xfb_enabled;
-
-   /**
-    * Gen6: Provoking vertex convention for odd-numbered triangles
-    * in tristrips.
-    */
-   GLuint pv_first:1;
-
-   /**
-    * Gen6: Number of varyings that are output to transform feedback.
-    */
-   GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
-
-   /**
-    * Gen6: Map from the index of a transform feedback binding table entry to the
-    * gl_varying_slot that should be streamed out through that binding table
-    * entry.
-    */
-   unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
-
-   /**
-    * Gen6: Map from the index of a transform feedback binding table entry to the
-    * swizzles that should be used when streaming out data through that
-    * binding table entry.
-    */
-   unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
-};
-
  /**
   * Stride in bytes between shader_time entries.
   *
@@ -767,9 +527,6 @@ struct brw_cache_item {
  };
  
  
-typedef bool (*cache_aux_compare_func)(const void *a, const void *b);
-typedef void (*cache_aux_free_func)(const void *aux);
-
  struct brw_cache {
     struct brw_context *brw;
  
@@ -779,15 +536,6 @@ struct brw_cache {
  
     uint32_t next_offset;
     bool bo_used_by_gpu;
-
-   /**
-    * Optional functions used in determining whether the prog_data for a new
-    * cache item matches an existing cache item (in case there's relevant data
-    * outside of the prog_data).  If NULL, a plain memcmp is done.
-    */
-   cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
-   /** Optional functions for freeing other pointers attached to a prog_data. */
-   cache_aux_free_func aux_free[BRW_MAX_CACHE];
  };
  
  
@@ -805,17 +553,12 @@ struct brw_tracked_state {
  enum shader_time_shader_type {
     ST_NONE,
     ST_VS,
-   ST_VS_WRITTEN,
-   ST_VS_RESET,
+   ST_TCS,
+   ST_TES,
     ST_GS,
-   ST_GS_WRITTEN,
-   ST_GS_RESET,
     ST_FS8,
-   ST_FS8_WRITTEN,
-   ST_FS8_RESET,
     ST_FS16,
-   ST_FS16_WRITTEN,
-   ST_FS16_RESET,
+   ST_CS,
  };
  
  struct brw_vertex_buffer {
@@ -848,13 +591,6 @@ struct brw_query_object {
     bool flushed;
  };
  
-struct intel_sync_object {
-   struct gl_sync_object Base;
-
-   /** Batch associated with this sync object */
-   drm_intel_bo *bo;
-};
-
  enum brw_gpu_ring {
     UNKNOWN_RING,
     RENDER_RING,
@@ -866,11 +602,12 @@ struct intel_batchbuffer {
     drm_intel_bo *bo;
     /** Last BO submitted to the hardware.  Used for glFinish(). */
     drm_intel_bo *last_bo;
-   /** BO for post-sync nonzero writes for gen6 workaround. */
-   drm_intel_bo *workaround_bo;
  
+#ifdef DEBUG
     uint16_t emit, total;
-   uint16_t used, reserved_space;
+#endif
+   uint16_t reserved_space;
+   uint32_t *map_next;
     uint32_t *map;
     uint32_t *cpu_map;
  #define BATCH_SZ (8192*sizeof(uint32_t))
@@ -879,14 +616,14 @@ struct intel_batchbuffer {
     enum brw_gpu_ring ring;
     bool needs_sol_reset;
  
-   uint8_t pipe_controls_since_last_cs_stall;
-
     struct {
-      uint16_t used;
+      uint32_t *map_next;
        int reloc_count;
     } saved;
  };
  
+#define MAX_GS_INPUT_VERTICES 6
+
  #define BRW_MAX_XFB_STREAMS 4
  
  struct brw_transform_feedback_object {
@@ -952,6 +689,24 @@ struct brw_stage_state
     uint32_t sampler_offset;
  };
  
+enum brw_predicate_state {
+   /* The first two states are used if we can determine whether to draw
+    * without having to look at the values in the query object buffer. This
+    * will happen if there is no conditional render in progress, if the query
+    * object is already completed or if something else has already added
+    * samples to the preliminary result such as via a BLT command.
+    */
+   BRW_PREDICATE_STATE_RENDER,
+   BRW_PREDICATE_STATE_DONT_RENDER,
+   /* In this case whether to draw or not depends on the result of an
+    * MI_PREDICATE command so the predicate enable bit needs to be checked.
+    */
+   BRW_PREDICATE_STATE_USE_BIT
+};
+
+struct shader_times;
+
+struct brw_l3_config;
  
  /**
   * brw_context is derived from gl_context.
@@ -966,11 +721,22 @@ struct brw_context
                                       unsigned unit,
                                       uint32_t *surf_offset,
                                       bool for_gather);
-      void (*update_renderbuffer_surface)(struct brw_context *brw,
-                                         struct gl_renderbuffer *rb,
-                                         bool layered,
-                                         unsigned unit);
-
+      uint32_t (*update_renderbuffer_surface)(struct brw_context *brw,
+                                              struct gl_renderbuffer *rb,
+                                              bool layered, unsigned unit,
+                                              uint32_t surf_index);
+
+      void (*emit_texture_surface_state)(struct brw_context *brw,
+                                         struct intel_mipmap_tree *mt,
+                                         GLenum target,
+                                         unsigned min_layer,
+                                         unsigned max_layer,
+                                         unsigned min_level,
+                                         unsigned max_level,
+                                         unsigned format,
+                                         unsigned swizzle,
+                                         uint32_t *surf_offset,
+                                         bool rw, bool for_gather);
        void (*emit_buffer_surface_state)(struct brw_context *brw,
                                          uint32_t *out_offset,
                                          drm_intel_bo *bo,
@@ -1005,6 +771,10 @@ struct brw_context
  
     drm_intel_context *hw_ctx;
  
+   /** BO for post-sync nonzero writes for gen6 workaround. */
+   drm_intel_bo *workaround_bo;
+   uint8_t pipe_controls_since_last_cs_stall;
+
     /**
      * Set of drm_intel_bo * that have been rendered to within this batchbuffer
      * and would need flushing before being used from another cache domain that
@@ -1016,7 +786,7 @@ struct brw_context
      * Number of resets observed in the system at context creation.
      *
      * This is tracked in the context so that we can determine that another
-    * reset has occured.
+    * reset has occurred.
      */
     uint32_t reset_count;
  
@@ -1029,7 +799,7 @@ struct brw_context
     } upload;
  
     /**
-    * Set if rendering has occured to the drawable's front buffer.
+    * Set if rendering has occurred to the drawable's front buffer.
      *
      * This is used in the DRI2 case to detect that glFlush should also copy
      * the contents of the fake front buffer to the real front buffer.
@@ -1083,15 +853,12 @@ struct brw_context
  
     int gen;
     int gt;
-   /* GT revision. This will be -1 if the revision couldn't be determined (eg,
-    * if the kernel doesn't support the query).
-    */
-   int revision;
  
     bool is_g4x;
     bool is_baytrail;
     bool is_haswell;
     bool is_cherryview;
+   bool is_broxton;
  
     bool has_hiz;
     bool has_separate_stencil;
@@ -1104,7 +871,12 @@ struct brw_context
     bool has_pln;
     bool no_simd8;
     bool use_rep_send;
-   bool scalar_vs;
+   bool use_resource_streamer;
+
+   /**
+    * Whether LRI can be used to write register values from the batch buffer.
+    */
+   bool can_do_pipelined_register_writes;
  
     /**
      * Some versions of Gen hardware don't do centroid interpolation correctly
@@ -1117,10 +889,11 @@ struct brw_context
  
     GLuint NewGLState;
     struct {
-      struct brw_state_flags dirty;
        struct brw_state_flags pipelines[BRW_NUM_PIPELINES];
     } state;
  
+   enum brw_pipeline last_pipeline;
+
     struct brw_cache cache;
  
     /** IDs for meta stencil blit shader programs. */
@@ -1147,6 +920,17 @@ struct brw_context
        uint32_t draw_params_offset;
     } draw;
  
+   struct {
+      /**
+       * For gl_NumWorkGroups: If num_work_groups_bo is non NULL, then it is
+       * an indirect call, and num_work_groups_offset is valid. Otherwise,
+       * num_work_groups is set based on glDispatchCompute.
+       */
+      drm_intel_bo *num_work_groups_bo;
+      GLintptr num_work_groups_offset;
+      const GLuint *num_work_groups;
+   } compute;
+
     struct {
        struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
        struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
@@ -1197,7 +981,10 @@ struct brw_context
      */
     const struct gl_vertex_program *vertex_program;
     const struct gl_geometry_program *geometry_program;
+   const struct gl_tess_ctrl_program *tess_ctrl_program;
+   const struct gl_tess_eval_program *tess_eval_program;
     const struct gl_fragment_program *fragment_program;
+   const struct gl_compute_program *compute_program;
  
     /**
      * Number of samples in ctx->DrawBuffer, updated by BRW_NEW_NUM_SAMPLES so
@@ -1209,11 +996,12 @@ struct brw_context
      * Platform specific constants containing the maximum number of threads
      * for each pipeline stage.
      */
-   int max_vs_threads;
-   int max_hs_threads;
-   int max_ds_threads;
-   int max_gs_threads;
-   int max_wm_threads;
+   unsigned max_vs_threads;
+   unsigned max_hs_threads;
+   unsigned max_ds_threads;
+   unsigned max_gs_threads;
+   unsigned max_wm_threads;
+   unsigned max_cs_threads;
  
     /* BRW_NEW_URB_ALLOCATIONS:
      */
@@ -1242,7 +1030,13 @@ struct brw_context
        GLuint clip_start;
        GLuint sf_start;
        GLuint cs_start;
-      GLuint size; /* Hardware URB size, in KB. */
+      /**
+       * URB size in the current configuration.  The units this is expressed
+       * in are somewhat inconsistent, see brw_device_info::urb::size.
+       *
+       * FINISHME: Represent the URB size consistently in KB on all platforms.
+       */
+      GLuint size;
  
        /* True if the most recently sent _3DSTATE_URB message allocated
         * URB space for the GS.
@@ -1271,17 +1065,9 @@ struct brw_context
        GLuint curbe_offset;
     } curbe;
  
-   /**
-    * Layout of vertex data exiting the vertex shader.
-    *
-    * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
-    */
-   struct brw_vue_map vue_map_vs;
-
     /**
      * Layout of vertex data exiting the geometry portion of the pipleine.
-    * This comes from the geometry shader if one exists, otherwise from the
-    * vertex shader.
+    * This comes from the last enabled shader stage (GS, DS, or VS).
      *
      * BRW_NEW_VUE_MAP_GEOM_OUT is flagged when the VUE map changes.
      */
@@ -1292,6 +1078,28 @@ struct brw_context
        struct brw_vs_prog_data *prog_data;
     } vs;
  
+   struct {
+      struct brw_stage_state base;
+      struct brw_tcs_prog_data *prog_data;
+
+      /**
+       * True if the 3DSTATE_HS command most recently emitted to the 3D
+       * pipeline enabled the HS; false otherwise.
+       */
+      bool enabled;
+   } tcs;
+
+   struct {
+      struct brw_stage_state base;
+      struct brw_tes_prog_data *prog_data;
+
+      /**
+       * True if the 3DSTATE_DS command most recently emitted to the 3D
+       * pipeline enabled the DS; false otherwise.
+       */
+      bool enabled;
+   } tes;
+
     struct {
        struct brw_stage_state base;
        struct brw_gs_prog_data *prog_data;
@@ -1358,8 +1166,20 @@ struct brw_context
         */
        drm_intel_bo *multisampled_null_render_target_bo;
        uint32_t fast_clear_op;
+
+      float offset_clamp;
     } wm;
  
+   struct {
+      struct brw_stage_state base;
+      struct brw_cs_prog_data *prog_data;
+   } cs;
+
+   /* RS hardware binding table */
+   struct {
+      drm_intel_bo *bo;
+      uint32_t next_offset;
+   } hw_bt_pool;
  
     struct {
        uint32_t state_offset;
@@ -1373,6 +1193,11 @@ struct brw_context
        bool begin_emitted;
     } query;
  
+   struct {
+      enum brw_predicate_state state;
+      bool supported;
+   } predicate;
+
     struct {
        /** A map from pipeline statistics counter IDs to MMIO addresses. */
        const int *statistics_registers;
@@ -1411,14 +1236,15 @@ struct brw_context
     } perfmon;
  
     int num_atoms[BRW_NUM_PIPELINES];
-   const struct brw_tracked_state render_atoms[57];
-   const struct brw_tracked_state compute_atoms[1];
+   const struct brw_tracked_state render_atoms[76];
+   const struct brw_tracked_state compute_atoms[10];
  
     /* If (INTEL_DEBUG & DEBUG_BATCH) */
     struct {
        uint32_t offset;
        uint32_t size;
        enum aub_state_struct_type type;
+      int index;
     } *state_batch_list;
     int state_batch_count;
  
@@ -1453,12 +1279,16 @@ struct brw_context
     uint32_t num_instances;
     int basevertex;
  
+   struct {
+      const struct brw_l3_config *config;
+   } l3;
+
     struct {
        drm_intel_bo *bo;
-      struct gl_shader_program **shader_programs;
-      struct gl_program **programs;
+      const char **names;
+      int *ids;
        enum shader_time_shader_type *types;
-      uint64_t *cumulative;
+      struct shader_times *cumulative;
        int num_entries;
        int max_entries;
        double report_time;
@@ -1572,12 +1402,21 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx);
  void brw_store_register_mem64(struct brw_context *brw,
                                drm_intel_bo *bo, uint32_t reg, int idx);
  
+/** brw_conditional_render.c */
+void brw_init_conditional_render_functions(struct dd_function_table *functions);
+bool brw_check_conditional_render(struct brw_context *brw);
+
  /** intel_batchbuffer.c */
  void brw_load_register_mem(struct brw_context *brw,
                             uint32_t reg,
                             drm_intel_bo *bo,
                             uint32_t read_domains, uint32_t write_domain,
                             uint32_t offset);
+void brw_load_register_mem64(struct brw_context *brw,
+                             uint32_t reg,
+                             drm_intel_bo *bo,
+                             uint32_t read_domains, uint32_t write_domain,
+                             uint32_t offset);
  
  /*======================================================================
   * brw_state_dump.c
@@ -1594,9 +1433,24 @@ void brw_validate_textures( struct brw_context *brw );
  /*======================================================================
   * brw_program.c
   */
+static inline bool
+key_debug(struct brw_context *brw, const char *name, int a, int b)
+{
+   if (a != b) {
+      perf_debug("  %s %d->%d\n", name, a, b);
+      return true;
+   }
+   return false;
+}
+
  void brwInitFragProgFuncs( struct dd_function_table *functions );
  
-int brw_get_scratch_size(int size);
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+static inline int
+brw_get_scratch_size(int size)
+{
+   return util_next_power_of_two(size | 1023);
+}
  void brw_get_scratch_bo(struct brw_context *brw,
                         drm_intel_bo **scratch_bo, int size);
  void brw_init_shader_time(struct brw_context *brw);
@@ -1617,13 +1471,13 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
  
  /* brw_fs_reg_allocate.cpp
   */
-void brw_fs_alloc_reg_sets(struct intel_screen *screen);
+void brw_fs_alloc_reg_sets(struct brw_compiler *compiler);
  
  /* brw_vec4_reg_allocate.cpp */
-void brw_vec4_alloc_reg_set(struct intel_screen *screen);
+void brw_vec4_alloc_reg_set(struct brw_compiler *compiler);
  
  /* brw_disasm.c */
-int brw_disassemble_inst(FILE *file, struct brw_context *brw,
+int brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
                           struct brw_inst *inst, bool is_compacted);
  
  /* brw_vs.c */
@@ -1661,8 +1515,12 @@ void brw_create_constant_surface(struct brw_context *brw,
                                   drm_intel_bo *bo,
                                   uint32_t offset,
                                   uint32_t size,
-                                 uint32_t *out_offset,
-                                 bool dword_pitch);
+                                 uint32_t *out_offset);
+void brw_create_buffer_surface(struct brw_context *brw,
+                               drm_intel_bo *bo,
+                               uint32_t offset,
+                               uint32_t size,
+                               uint32_t *out_offset);
  void brw_update_buffer_texture_surface(struct gl_context *ctx,
                                         unsigned unit,
                                         uint32_t *surf_offset);
@@ -1674,18 +1532,24 @@ brw_update_sol_surface(struct brw_context *brw,
  void brw_upload_ubo_surfaces(struct brw_context *brw,
                              struct gl_shader *shader,
                               struct brw_stage_state *stage_state,
-                             struct brw_stage_prog_data *prog_data,
-                             bool dword_pitch);
+                             struct brw_stage_prog_data *prog_data);
  void brw_upload_abo_surfaces(struct brw_context *brw,
-                             struct gl_shader_program *prog,
+                             struct gl_shader *shader,
                               struct brw_stage_state *stage_state,
                               struct brw_stage_prog_data *prog_data);
+void brw_upload_image_surfaces(struct brw_context *brw,
+                               struct gl_shader *shader,
+                               struct brw_stage_state *stage_state,
+                               struct brw_stage_prog_data *prog_data);
  
  /* brw_surface_formats.c */
-bool brw_is_hiz_depth_format(struct brw_context *ctx, mesa_format format);
  bool brw_render_target_supported(struct brw_context *brw,
                                   struct gl_renderbuffer *rb);
+bool brw_losslessly_compressible_format(struct brw_context *brw,
+                                        uint32_t brw_format);
  uint32_t brw_depth_format(struct brw_context *brw, mesa_format format);
+mesa_format brw_lower_mesa_image_format(const struct brw_device_info *devinfo,
+                                        mesa_format format);
  
  /* brw_performance_monitor.c */
  void brw_init_performance_monitors(struct brw_context *brw);
@@ -1797,6 +1661,10 @@ gen7_emit_urb_state(struct brw_context *brw,
  extern GLenum
  brw_get_graphics_reset_status(struct gl_context *ctx);
  
+/* brw_compute.c */
+extern void
+brw_init_compute_functions(struct dd_function_table *functions);
+
  /*======================================================================
   * Inline conversion functions.  These are better-typed than the
   * macros used previously:
@@ -1837,6 +1705,12 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
     return (const struct brw_fragment_program *) p;
  }
  
+static inline struct brw_compute_program *
+brw_compute_program(struct gl_compute_program *p)
+{
+   return (struct brw_compute_program *) p;
+}
+
  /**
   * Pre-gen6, the register file of the EUs was shared between threads,
   * and each thread used some subset allocated on a 16-register block
@@ -1879,6 +1753,7 @@ struct opcode_desc {
  
  extern const struct opcode_desc opcode_descs[128];
  extern const char * const conditional_modifier[16];
+extern const char *const pred_ctrl_align16[16];
  
  void
  brw_emit_depthbuffer(struct brw_context *brw);
@@ -1927,11 +1802,6 @@ void gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
  
  uint32_t get_hw_prim_for_gl_prim(int mode);
  
-void
-brw_setup_vue_key_clip_info(struct brw_context *brw,
-                            struct brw_vue_prog_key *key,
-                            bool program_uses_clip_distance);
-
  void
  gen6_upload_push_constants(struct brw_context *brw,
                             const struct gl_program *prog,
@@ -1939,6 +1809,25 @@ gen6_upload_push_constants(struct brw_context *brw,
                             struct brw_stage_state *stage_state,
                             enum aub_state_struct_type type);
  
+bool
+gen9_use_linear_1d_layout(const struct brw_context *brw,
+                          const struct intel_mipmap_tree *mt);
+
+/* brw_pipe_control.c */
+int brw_init_pipe_control(struct brw_context *brw,
+                         const struct brw_device_info *info);
+void brw_fini_pipe_control(struct brw_context *brw);
+
+void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
+void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
+                                 drm_intel_bo *bo, uint32_t offset,
+                                 uint32_t imm_lower, uint32_t imm_upper);
+void brw_emit_mi_flush(struct brw_context *brw);
+void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
+void brw_emit_depth_stall_flushes(struct brw_context *brw);
+void gen7_emit_vs_workaround_flush(struct brw_context *brw);
+void gen7_emit_cs_stall_flush(struct brw_context *brw);
+
  #ifdef __cplusplus
  }
  #endif