X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_context.h;h=fa2fc72e435e41568cedc999a77bcad010464492;hb=77a18428fffc938a4e3fa9b592e3e104dda0fe7f;hp=7069724466ab5f4e7693eac4757849143df8657a;hpb=652901e95b4ed406293d0e1fabee857c054119b1;p=mesa.git

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7069724466a..fa2fc72e435 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -120,32 +120,58 @@
 
 struct brw_context;
 
-#define BRW_NEW_URB_FENCE               0x1
-#define BRW_NEW_FRAGMENT_PROGRAM        0x2
-#define BRW_NEW_VERTEX_PROGRAM          0x4
-#define BRW_NEW_INPUT_DIMENSIONS        0x8
-#define BRW_NEW_CURBE_OFFSETS           0x10
-#define BRW_NEW_REDUCED_PRIMITIVE       0x20
-#define BRW_NEW_PRIMITIVE               0x40
-#define BRW_NEW_CONTEXT                 0x80
-#define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
-#define BRW_NEW_PSP                     0x800
-#define BRW_NEW_WM_SURFACES		0x1000
-#define BRW_NEW_BINDING_TABLE		0x2000
-#define BRW_NEW_INDICES			0x4000
-#define BRW_NEW_VERTICES		0x8000
+enum brw_state_id {
+   BRW_STATE_URB_FENCE,
+   BRW_STATE_FRAGMENT_PROGRAM,
+   BRW_STATE_VERTEX_PROGRAM,
+   BRW_STATE_INPUT_DIMENSIONS,
+   BRW_STATE_CURBE_OFFSETS,
+   BRW_STATE_REDUCED_PRIMITIVE,
+   BRW_STATE_PRIMITIVE,
+   BRW_STATE_CONTEXT,
+   BRW_STATE_WM_INPUT_DIMENSIONS,
+   BRW_STATE_PSP,
+   BRW_STATE_WM_SURFACES,
+   BRW_STATE_VS_BINDING_TABLE,
+   BRW_STATE_GS_BINDING_TABLE,
+   BRW_STATE_PS_BINDING_TABLE,
+   BRW_STATE_INDICES,
+   BRW_STATE_VERTICES,
+   BRW_STATE_BATCH,
+   BRW_STATE_NR_WM_SURFACES,
+   BRW_STATE_NR_VS_SURFACES,
+   BRW_STATE_INDEX_BUFFER,
+   BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_PROGRAM_CACHE,
+   BRW_STATE_STATE_BASE_ADDRESS,
+};
+
+#define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
+#define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
+#define BRW_NEW_INPUT_DIMENSIONS        (1 << BRW_STATE_INPUT_DIMENSIONS)
+#define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
+#define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PRIMITIVE               (1 << BRW_STATE_PRIMITIVE)
+#define BRW_NEW_CONTEXT                 (1 << BRW_STATE_CONTEXT)
+#define BRW_NEW_WM_INPUT_DIMENSIONS     (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
+#define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
+#define BRW_NEW_WM_SURFACES		(1 << BRW_STATE_WM_SURFACES)
+#define BRW_NEW_VS_BINDING_TABLE	(1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE	(1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE	(1 << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_INDICES			(1 << BRW_STATE_INDICES)
+#define BRW_NEW_VERTICES		(1 << BRW_STATE_VERTICES)
 /**
  * Used for any batch entry with a relocated pointer that will be used
  * by any 3D rendering.
  */
-#define BRW_NEW_BATCH			0x10000
-/** brw->depth_region updated */
-#define BRW_NEW_DEPTH_BUFFER		0x20000
-#define BRW_NEW_NR_WM_SURFACES		0x40000
-#define BRW_NEW_NR_VS_SURFACES		0x80000
-#define BRW_NEW_INDEX_BUFFER		0x100000
-#define BRW_NEW_VS_CONSTBUF		0x200000
-#define BRW_NEW_WM_CONSTBUF		0x200000
+#define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
+/** \see brw.state.depth_region */
+#define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE		(1 << BRW_STATE_PROGRAM_CACHE)
+#define BRW_NEW_STATE_BASE_ADDRESS	(1 << BRW_STATE_STATE_BASE_ADDRESS)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -158,12 +184,38 @@ struct brw_state_flags {
    GLuint cache;
 };
 
+enum state_struct_type {
+   AUB_TRACE_VS_STATE =			1,
+   AUB_TRACE_GS_STATE =			2,
+   AUB_TRACE_CLIP_STATE =		3,
+   AUB_TRACE_SF_STATE =			4,
+   AUB_TRACE_WM_STATE =			5,
+   AUB_TRACE_CC_STATE =			6,
+   AUB_TRACE_CLIP_VP_STATE =		7,
+   AUB_TRACE_SF_VP_STATE =		8,
+   AUB_TRACE_CC_VP_STATE =		0x9,
+   AUB_TRACE_SAMPLER_STATE =		0xa,
+   AUB_TRACE_KERNEL_INSTRUCTIONS =	0xb,
+   AUB_TRACE_SCRATCH_SPACE =		0xc,
+   AUB_TRACE_SAMPLER_DEFAULT_COLOR =    0xd,
+
+   AUB_TRACE_SCISSOR_STATE =		0x15,
+   AUB_TRACE_BLEND_STATE =		0x16,
+   AUB_TRACE_DEPTH_STENCIL_STATE =	0x17,
+
+   /* Not written to .aub files the same way the structures above are. */
+   AUB_TRACE_NO_TYPE =			0x100,
+   AUB_TRACE_BINDING_TABLE =		0x101,
+   AUB_TRACE_SURFACE_STATE =		0x102,
+   AUB_TRACE_VS_CONSTANTS =		0x103,
+   AUB_TRACE_WM_CONSTANTS =		0x104,
+};
 
 /** Subclass of Mesa vertex program */
 struct brw_vertex_program {
    struct gl_vertex_program program;
    GLuint id;
-   GLboolean use_const_buffer;
+   bool use_const_buffer;
 };
 
 
@@ -171,9 +223,6 @@ struct brw_vertex_program {
 struct brw_fragment_program {
    struct gl_fragment_program program;
    GLuint id;  /**< serial no. to identify frag progs, never re-used */
-
-   /** for debugging, which texture units are referenced */
-   GLbitfield tex_units_used;
 };
 
 struct brw_shader {
@@ -192,6 +241,7 @@ enum param_conversion {
    PARAM_CONVERT_F2I,
    PARAM_CONVERT_F2U,
    PARAM_CONVERT_F2B,
+   PARAM_CONVERT_ZERO,
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -204,13 +254,16 @@ struct brw_wm_prog_data {
    GLuint urb_read_length;
 
    GLuint first_curbe_grf;
-   GLuint total_grf;
+   GLuint first_curbe_grf_16;
+   GLuint reg_blocks;
+   GLuint reg_blocks_16;
    GLuint total_scratch;
 
    GLuint nr_params;       /**< number of float params/constants */
    GLuint nr_pull_params;
-   GLboolean error;
+   bool error;
    int dispatch_width;
+   uint32_t prog_offset_16;
 
    /* Pointer to tracked values (only valid once
     * _mesa_load_state_parameters has been called at runtime).
@@ -221,6 +274,75 @@ struct brw_wm_prog_data {
    enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
 };
 
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_vert_result.  The values of this enum are
+ * assigned such that they don't conflict with gl_vert_result.
+ */
+typedef enum
+{
+   BRW_VERT_RESULT_NDC = VERT_RESULT_MAX,
+   BRW_VERT_RESULT_HPOS_DUPLICATE,
+   BRW_VERT_RESULT_PAD,
+   BRW_VERT_RESULT_MAX
+} brw_vert_result;
+
+
+/**
+ * Data structure recording the relationship between the gl_vert_result enum
+ * and "slots" within the vertex URB entry (VUE).  A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots.  When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+   /**
+    * Map from gl_vert_result value to VUE slot.  For gl_vert_results that are
+    * not stored in a slot (because they are not written, or because
+    * additional processing is applied before storing them in the VUE), the
+    * value is -1.
+    */
+   int vert_result_to_slot[BRW_VERT_RESULT_MAX];
+
+   /**
+    * Map from VUE slot to gl_vert_result value.  For slots that do not
+    * directly correspond to a gl_vert_result, the value comes from
+    * brw_vert_result.
+    *
+    * For slots that are not in use, the value is BRW_VERT_RESULT_MAX (this
+    * simplifies code that uses the value stored in slot_to_vert_result to
+    * create a bit mask).
+    */
+   int slot_to_vert_result[BRW_VERT_RESULT_MAX];
+
+   /**
+    * Total number of VUE slots in use
+    */
+   int num_slots;
+};
+
+/**
+ * Convert a VUE slot number into a byte offset within the VUE.
+ */
+static inline GLuint brw_vue_slot_to_offset(GLuint slot)
+{
+   return 16*slot;
+}
+
+/**
+ * Convert a vert_result into a byte offset within the VUE.
+ */
+static inline GLuint brw_vert_result_to_offset(struct brw_vue_map *vue_map,
+                                               GLuint vert_result)
+{
+   return brw_vue_slot_to_offset(vue_map->vert_result_to_slot[vert_result]);
+}
+
+
 struct brw_sf_prog_data {
    GLuint urb_read_length;
    GLuint total_grf;
@@ -252,12 +374,20 @@ struct brw_vs_prog_data {
    GLuint total_grf;
    GLbitfield64 outputs_written;
    GLuint nr_params;       /**< number of float params/constants */
+   GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */
+   GLuint total_scratch;
 
    GLuint inputs_read;
 
    /* Used for calculating urb partitions:
     */
    GLuint urb_entry_size;
+
+   const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
+   const float *pull_param[MAX_UNIFORMS * 4];
+
+   bool uses_new_param_layout;
+   bool uses_vertexid;
 };
 
 
@@ -275,31 +405,48 @@ struct brw_vs_ouput_sizes {
 #define BRW_MAX_DRAW_BUFFERS 8
 
 /**
- * Size of our surface binding table for the WM.
- * This contains pointers to the drawing surfaces and current texture
- * objects and shader constant buffers (+2).
- */
-#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
-
-/**
- * Helpers to convert drawing buffers, textures and constant buffers
- * to surface binding table indexes, for WM.
+ * Helpers to create Surface Binding Table indexes for draw buffers,
+ * textures, and constant buffers.
+ *
+ * Shader threads access surfaces via numeric handles, rather than directly
+ * using pointers.  The binding table maps these numeric handles to the
+ * address of the actual buffer.
+ *
+ * For example, a shader might ask to sample from "surface 7."  In this case,
+ * bind[7] would contain a pointer to a texture.
+ *
+ * Although the hardware supports separate binding tables per pipeline stage
+ * (VS, HS, DS, GS, PS), we currently share a single binding table for all of
+ * them.  This is purely for convenience.
+ *
+ * Currently our binding tables are (arbitrarily) programmed as follows:
+ *
+ *    +-------------------------------+
+ *    |   0 | Draw buffer 0           | .
+ *    |   . |     .                   |  \
+ *    |   : |     :                   |   > Only relevant to the WM.
+ *    |   7 | Draw buffer 7           |  /
+ *    |-----|-------------------------| `
+ *    |   8 | VS Pull Constant Buffer |
+ *    |   9 | WM Pull Constant Buffer |
+ *    |-----|-------------------------|
+ *    |  10 | Texture 0               |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  25 | Texture 15              |
+ *    +-------------------------------+
+ *
+ * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
+ * the identity function or things will break.  We do want to keep draw buffers
+ * first so we can use headerless render target writes for RT 0.
  */
 #define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) 
-#define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 1 + (t))
-
-/**
- * Size of surface binding table for the VS.
- * Only one constant buffer for now.
- */
-#define BRW_VS_MAX_SURF 1
-
-/**
- * Only a VS constant buffer
- */
-#define SURF_INDEX_VERT_CONST_BUFFER 0
+#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
+#define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
 
+/** Maximum size of the binding table. */
+#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
 
 enum brw_cache_id {
    BRW_BLEND_STATE,
@@ -308,7 +455,6 @@ enum brw_cache_id {
    BRW_CC_VP,
    BRW_CC_UNIT,
    BRW_WM_PROG,
-   BRW_SAMPLER_DEFAULT_COLOR,
    BRW_SAMPLER,
    BRW_WM_UNIT,
    BRW_SF_PROG,
@@ -335,11 +481,11 @@ struct brw_cache_item {
    /** 32-bit hash of the key data */
    GLuint hash;
    GLuint key_size;		/* for variable-sized keys */
+   GLuint aux_size;
    const void *key;
-   drm_intel_bo **reloc_bufs;
-   GLuint nr_reloc_bufs;
 
-   drm_intel_bo *bo;
+   uint32_t offset;
+   uint32_t size;
 
    struct brw_cache_item *next;
 };   
@@ -350,14 +496,11 @@ struct brw_cache {
    struct brw_context *brw;
 
    struct brw_cache_item **items;
+   drm_intel_bo *bo;
    GLuint size, n_items;
 
-   char *name[BRW_MAX_CACHE];
-
-   /* Record of the last BOs chosen for each cache_id.  Used to set
-    * brw->state.dirty.cache when a new cache item is chosen.
-    */
-   drm_intel_bo *last_bo[BRW_MAX_CACHE];
+   uint32_t next_offset;
+   bool bo_used_by_gpu;
 };
 
 
@@ -369,7 +512,6 @@ struct brw_cache {
  */
 struct brw_tracked_state {
    struct brw_state_flags dirty;
-   void (*prepare)( struct brw_context *brw );
    void (*emit)( struct brw_context *brw );
 };
 
@@ -381,7 +523,6 @@ struct brw_tracked_state {
 #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
 #define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
 #define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
-#define CACHE_NEW_SAMPLER_DEFAULT_COLOR  (1<<BRW_SAMPLER_DEFAULT_COLOR)
 #define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
 #define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
 #define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
@@ -408,21 +549,24 @@ struct brw_cached_batch_item {
  */
 #define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
 
+struct brw_vertex_buffer {
+   /** Buffer object containing the uploaded vertex data */
+   drm_intel_bo *bo;
+   uint32_t offset;
+   /** Byte stride between elements in the uploaded array */
+   GLuint stride;
+};
 struct brw_vertex_element {
    const struct gl_client_array *glarray;
 
+   int buffer;
+
    /** The corresponding Mesa vertex attribute */
    gl_vert_attrib attrib;
    /** Size of a complete element */
    GLuint element_size;
-   /** Number of uploaded elements for this input. */
-   GLuint count;
-   /** Byte stride between elements in the uploaded array */
-   GLuint stride;
    /** Offset of the first element within the buffer object */
    unsigned int offset;
-   /** Buffer object containing the uploaded vertex data */
-   drm_intel_bo *bo;
 };
 
 
@@ -449,35 +593,18 @@ struct brw_query_object {
 struct brw_context 
 {
    struct intel_context intel;  /**< base class, must be first field */
-   GLuint primitive;
-
-   GLboolean emit_state_always;
-   GLboolean has_surface_tile_offset;
-   GLboolean has_compr4;
-   GLboolean has_negative_rhw_bug;
-   GLboolean has_aa_line_parameters;
-   GLboolean has_pln;
-;
-   struct {
-      struct brw_state_flags dirty;
+   GLuint primitive; /**< Hardware primitive, such as _3DPRIM_TRILIST. */
 
-      GLuint nr_color_regions;
-      struct intel_region *color_regions[MAX_DRAW_BUFFERS];
-      struct intel_region *depth_region;
+   bool emit_state_always;
+   bool has_surface_tile_offset;
+   bool has_compr4;
+   bool has_negative_rhw_bug;
+   bool has_aa_line_parameters;
+   bool has_pln;
+   bool new_vs_backend;
 
-      /**
-       * List of buffers accumulated in brw_validate_state to receive
-       * drm_intel_bo_check_aperture treatment before exec, so we can
-       * know if we should flush the batch and try again before
-       * emitting primitives.
-       *
-       * This can be a fixed number as we only have a limited number of
-       * objects referenced from the batchbuffer in a primitive emit,
-       * consisting of the vertex buffers, pipelined state pointers,
-       * the CURBE, the depth buffer, and a query BO.
-       */
-      drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
-      int validated_bo_count;
+   struct {
+      struct brw_state_flags dirty;
    } state;
 
    struct brw_cache cache;
@@ -485,23 +612,27 @@ struct brw_context
 
    struct {
       struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+      struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
+      struct {
+	      uint32_t handle;
+	      uint32_t offset;
+	      uint32_t stride;
+      } current_buffers[VERT_ATTRIB_MAX];
 
       struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
       GLuint nr_enabled;
-
-#define BRW_NR_UPLOAD_BUFS 17
-#define BRW_UPLOAD_INIT_SIZE (128*1024)
-
-      struct {
-	 drm_intel_bo *bo;
-	 GLuint offset;
-      } upload;
+      GLuint nr_buffers, nr_current_buffers;
 
       /* Summary of size and varying of active arrays, so we can check
        * for changes to this state:
        */
       struct brw_vertex_info info;
       unsigned int min_index, max_index;
+
+      /* Offset from start of vertex buffer so we can avoid redefining
+       * the same VB packed over and over again.
+       */
+      unsigned int start_vertex_bias;
    } vb;
 
    struct {
@@ -512,10 +643,10 @@ struct brw_context
        */
       const struct _mesa_index_buffer *ib;
 
-      /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+      /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
       drm_intel_bo *bo;
-      unsigned int offset;
-      unsigned int size;
+      GLuint type;
+
       /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
        * avoid re-uploading the IB packet over and over if we're actually
        * referencing the same index buffer.
@@ -528,17 +659,18 @@ struct brw_context
    const struct gl_vertex_program *vertex_program;
    const struct gl_fragment_program *fragment_program;
 
-
-   /* For populating the gtt:
-    */
-   GLuint next_free_page;
-
    /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
    uint32_t CMD_VF_STATISTICS;
    /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
    uint32_t CMD_PIPELINE_SELECT;
-   int vs_max_threads;
-   int wm_max_threads;
+
+   /**
+    * Platform specific constants containing the maximum number of threads
+    * for each pipeline stage.
+    */
+   int max_vs_threads;
+   int max_gs_threads;
+   int max_wm_threads;
 
    /* BRW_NEW_URB_ALLOCATIONS:
     */
@@ -547,7 +679,10 @@ struct brw_context
       GLuint csize;		/* constant buffer size in urb registers */
       GLuint sfsize;		/* setup data size in urb registers */
 
-      GLboolean constrained;
+      bool constrained;
+
+      GLuint max_vs_entries;	/* Maximum number of VS entries */
+      GLuint max_gs_entries;	/* Maximum number of GS entries */
 
       GLuint nr_vs_entries;
       GLuint nr_gs_entries;
@@ -555,12 +690,14 @@ struct brw_context
       GLuint nr_sf_entries;
       GLuint nr_cs_entries;
 
-      /* gen6 */
+      /* gen6:
+       * The length of each URB entry owned by the VS (or GS), as
+       * a number of 1024-bit (128-byte) rows.  Should be >= 1.
+       *
+       * gen7: Same meaning, but in 512-bit (64-byte) rows.
+       */
       GLuint vs_size;
-/*       GLuint gs_size; */
-/*       GLuint clip_size; */
-/*       GLuint sf_size; */
-/*       GLuint cs_size; */
+      GLuint gs_size;
 
       GLuint vs_start;
       GLuint gs_start;
@@ -603,45 +740,81 @@ struct brw_context
       GLuint last_bufsz;
    } curbe;
 
+   struct {
+      /** Binding table of pointers to surf_bo entries */
+      uint32_t bo_offset;
+      uint32_t surf_offset[BRW_MAX_SURFACES];
+   } bind;
+
+   /** SAMPLER_STATE count and offset */
+   struct {
+      GLuint count;
+      uint32_t offset;
+   } sampler;
+
    struct {
       struct brw_vs_prog_data *prog_data;
       int8_t *constant_map; /* variable array following prog_data */
 
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
+      drm_intel_bo *scratch_bo;
       drm_intel_bo *const_bo;
+      /** Offset in the program cache to the VS program */
+      uint32_t prog_offset;
+      uint32_t state_offset;
 
-      /** Binding table of pointers to surf_bo entries */
-      drm_intel_bo *bind_bo;
-      uint32_t bind_bo_offset;
-      drm_intel_bo *surf_bo[BRW_VS_MAX_SURF];
-      uint32_t surf_offset[BRW_VS_MAX_SURF];
-      GLuint nr_surfaces;      
+      uint32_t push_const_offset; /* Offset in the batchbuffer */
+      int push_const_size; /* in 256-bit register increments */
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /**
+       * Array of the ra classes for the unaligned contiguous register
+       * block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+      /** @} */
    } vs;
 
    struct {
       struct brw_gs_prog_data *prog_data;
 
-      GLboolean prog_active;
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
+      bool prog_active;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      uint32_t state_offset;
    } gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
 
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
-      drm_intel_bo *vp_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+
+      /* Offset in the batch to the CLIP state on pre-gen6. */
+      uint32_t state_offset;
+
+      /* As of gen6, this is the offset in the batch to the CLIP VP,
+       * instead of vp_bo.
+       */
+      uint32_t vp_offset;
    } clip;
 
 
    struct {
       struct brw_sf_prog_data *prog_data;
 
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
-      drm_intel_bo *vp_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      uint32_t state_offset;
+      uint32_t vp_offset;
    } sf;
 
    struct {
@@ -653,69 +826,125 @@ struct brw_context
        */
       GLbitfield input_size_masks[4];
 
-      /** Array of surface default colors (texture border color) */
-      drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+      /** offsets in the batch to sampler default colors (texture border color)
+       */
+      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
 
       GLuint render_surf;
-      GLuint nr_surfaces;      
 
-      GLuint max_threads;
       drm_intel_bo *scratch_bo;
 
-      GLuint sampler_count;
-      drm_intel_bo *sampler_bo;
+      /** Offset in the program cache to the WM program */
+      uint32_t prog_offset;
 
-      /** Binding table of pointers to surf_bo entries */
-      drm_intel_bo *bind_bo;
-      uint32_t bind_bo_offset;
-      drm_intel_bo *surf_bo[BRW_WM_MAX_SURF];
-      uint32_t surf_offset[BRW_WM_MAX_SURF];
+      uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
 
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
       drm_intel_bo *const_bo; /* pull constant buffer. */
       /**
-       *  This is the push constant BO on gen6.
+       * This is offset in the batch to the push constants on gen6.
        *
        * Pre-gen6, push constants live in the CURBE.
        */
-      drm_intel_bo *push_const_bo;
-   } wm;
+      uint32_t push_const_offset;
 
+      /** @{ register allocator */
 
-   struct {
-      /* gen4 */
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *vp_bo;
+      struct ra_regs *regs;
+
+      /** Array of the ra classes for the unaligned contiguous
+       * register block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+
+      /**
+       * ra class for the aligned pairs we use for PLN, which doesn't
+       * appear in *classes.
+       */
+      int aligned_pairs_class;
+
+      /** @} */
+   } wm;
 
-      /* gen6 */
-      drm_intel_bo *blend_state_bo;
-      drm_intel_bo *depth_stencil_state_bo;
-      drm_intel_bo *color_calc_state_bo;
 
-      drm_intel_bo *state_bo;
+   struct {
       uint32_t state_offset;
+      uint32_t blend_state_offset;
+      uint32_t depth_stencil_state_offset;
+      uint32_t vp_offset;
    } cc;
 
    struct {
       struct brw_query_object *obj;
       drm_intel_bo *bo;
       int index;
-      GLboolean active;
+      bool active;
    } query;
    /* Used to give every program string a unique id
     */
    GLuint program_id;
+
+   int num_atoms;
+   const struct brw_tracked_state **atoms;
+
+   /* If (INTEL_DEBUG & DEBUG_BATCH) */
+   struct {
+      uint32_t offset;
+      uint32_t size;
+      enum state_struct_type type;
+   } *state_batch_list;
+   int state_batch_count;
+
+   /**
+    * \brief State needed to execute HiZ meta-ops
+    *
+    * All fields except \c op are initialized by gen6_hiz_init().
+    */
+   struct brw_hiz_state {
+      /**
+       * \brief Indicates which HiZ operation is in progress.
+       *
+       * See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
+       *   - 7.5.3.1 Depth Buffer Clear
+       *   - 7.5.3.2 Depth Buffer Resolve
+       *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+       */
+      enum brw_hiz_op {
+	 BRW_HIZ_OP_NONE = 0,
+	 BRW_HIZ_OP_DEPTH_CLEAR,
+	 BRW_HIZ_OP_DEPTH_RESOLVE,
+	 BRW_HIZ_OP_HIZ_RESOLVE,
+      } op;
+
+      /** \brief Shader state */
+      struct {
+	 GLuint program;
+	 GLuint position_vbo;
+	 GLint position_location;
+      } shader;
+
+      /** \brief VAO for the rectangle primitive's vertices. */
+      GLuint vao;
+
+      GLuint fbo;
+      struct gl_renderbuffer *depth_rb;
+   } hiz;
 };
 
 
+
 #define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a)
 
 struct brw_instruction_info {
     char    *name;
     int	    nsrc;
     int	    ndst;
-    GLboolean is_arith;
+    bool is_arith;
 };
 extern const struct brw_instruction_info brw_opcodes[128];
 
@@ -727,10 +956,10 @@ void brwInitVtbl( struct brw_context *brw );
 /*======================================================================
  * brw_context.c
  */
-GLboolean brwCreateContext( int api,
-			    const struct gl_config *mesaVis,
-			    __DRIcontext *driContextPriv,
-			    void *sharedContextPrivate);
+bool brwCreateContext(int api,
+		      const struct gl_config *mesaVis,
+		      __DRIcontext *driContextPriv,
+		      void *sharedContextPrivate);
 
 /*======================================================================
  * brw_queryobj.c
@@ -756,14 +985,15 @@ void brw_validate_textures( struct brw_context *brw );
  */
 void brwInitFragProgFuncs( struct dd_function_table *functions );
 
+int brw_get_scratch_size(int size);
+void brw_get_scratch_bo(struct intel_context *intel,
+			drm_intel_bo **scratch_bo, int size);
+
 
 /* brw_urb.c
  */
 void brw_upload_urb_fence(struct brw_context *brw);
 
-/* brw_cc.c */
-void brw_update_cc_vp(struct brw_context *brw);
-
 /* brw_curbe.c
  */
 void brw_upload_cs_urb_state(struct brw_context *brw);
@@ -771,9 +1001,23 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
 /* brw_disasm.c */
 int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
 
-/* brw_state.c */
-void brw_enable(struct gl_context * ctx, GLenum cap, GLboolean state);
-void brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval);
+/* brw_vs.c */
+void brw_compute_vue_map(struct brw_vue_map *vue_map,
+                         const struct intel_context *intel,
+                         bool userclip_active,
+                         GLbitfield64 outputs_written);
+gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx);
+
+/* brw_wm.c */
+unsigned
+brw_compute_barycentric_interp_modes(bool shade_model_flat,
+                                     const struct gl_fragment_program *fprog);
+
+/* gen6_clip_state.c */
+bool
+brw_fprog_uses_noperspective(const struct gl_fragment_program *fprog);
+
+
 
 /*======================================================================
  * Inline conversion functions.  These are better-typed than the
@@ -810,7 +1054,7 @@ brw_fragment_program_const(const struct gl_fragment_program *p)
 }
 
 static inline
-float convert_param(enum param_conversion conversion, float param)
+float convert_param(enum param_conversion conversion, const float *param)
 {
    union {
       float f;
@@ -820,25 +1064,57 @@ float convert_param(enum param_conversion conversion, float param)
 
    switch (conversion) {
    case PARAM_NO_CONVERT:
-      return param;
+      return *param;
    case PARAM_CONVERT_F2I:
-      fi.i = param;
+      fi.i = *param;
       return fi.f;
    case PARAM_CONVERT_F2U:
-      fi.u = param;
+      fi.u = *param;
       return fi.f;
    case PARAM_CONVERT_F2B:
-      if (param != 0.0)
+      if (*param != 0.0)
 	 fi.i = 1;
       else
 	 fi.i = 0;
       return fi.f;
+   case PARAM_CONVERT_ZERO:
+      return 0.0;
    default:
-      return param;
+      return *param;
+   }
+}
+
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity.  The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+   return ALIGN(reg_count, 16) / 16 - 1;
+}
+
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+		  uint32_t prog_offset)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 5) {
+      /* Using state base address. */
+      return prog_offset;
    }
+
+   drm_intel_bo_emit_reloc(intel->batch.bo,
+			   state_offset,
+			   brw->cache.bo,
+			   prog_offset,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return brw->cache.bo->offset + prog_offset;
 }
 
-GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
+bool brw_do_cubemap_normalize(struct exec_list *instructions);
 
 #endif
-