i965: Start adding the VS visitor and codegen.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.h
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index b3d297deae628387e424f82fcc2a12d403c1f4be..7b6b64c1a5c384eafac9872e6b1e8782d2227a0a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -132,16 +132,19 @@ enum brw_state_id {
     BRW_STATE_WM_INPUT_DIMENSIONS,
     BRW_STATE_PSP,
     BRW_STATE_WM_SURFACES,
-   BRW_STATE_BINDING_TABLE,
+   BRW_STATE_VS_BINDING_TABLE,
+   BRW_STATE_GS_BINDING_TABLE,
+   BRW_STATE_PS_BINDING_TABLE,
     BRW_STATE_INDICES,
     BRW_STATE_VERTICES,
     BRW_STATE_BATCH,
-   BRW_STATE_DEPTH_BUFFER,
     BRW_STATE_NR_WM_SURFACES,
     BRW_STATE_NR_VS_SURFACES,
     BRW_STATE_INDEX_BUFFER,
     BRW_STATE_VS_CONSTBUF,
-   BRW_STATE_WM_CONSTBUF
+   BRW_STATE_WM_CONSTBUF,
+   BRW_STATE_PROGRAM_CACHE,
+   BRW_STATE_STATE_BASE_ADDRESS,
  };
  
  #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
@@ -155,21 +158,24 @@ enum brw_state_id {
  #define BRW_NEW_WM_INPUT_DIMENSIONS     (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
  #define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
  #define BRW_NEW_WM_SURFACES            (1 << BRW_STATE_WM_SURFACES)
-#define BRW_NEW_BINDING_TABLE          (1 << BRW_STATE_BINDING_TABLE)
+#define BRW_NEW_VS_BINDING_TABLE       (1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE       (1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE       (1 << BRW_STATE_PS_BINDING_TABLE)
  #define BRW_NEW_INDICES                        (1 << BRW_STATE_INDICES)
  #define BRW_NEW_VERTICES               (1 << BRW_STATE_VERTICES)
  /**
   * Used for any batch entry with a relocated pointer that will be used
   * by any 3D rendering.
   */
-#define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
+#define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
  /** \see brw.state.depth_region */
-#define BRW_NEW_DEPTH_BUFFER           (1 << BRW_STATE_DEPTH_BUFFER)
-#define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
-#define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
-#define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
-#define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
-#define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
+#define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
+#define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
+#define BRW_NEW_STATE_BASE_ADDRESS     (1 << BRW_STATE_STATE_BASE_ADDRESS)
  
  struct brw_state_flags {
     /** State update flags signalled by mesa internals */
@@ -182,6 +188,32 @@ struct brw_state_flags {
     GLuint cache;
  };
  
+enum state_struct_type {
+   AUB_TRACE_VS_STATE =                        1,
+   AUB_TRACE_GS_STATE =                        2,
+   AUB_TRACE_CLIP_STATE =              3,
+   AUB_TRACE_SF_STATE =                        4,
+   AUB_TRACE_WM_STATE =                        5,
+   AUB_TRACE_CC_STATE =                        6,
+   AUB_TRACE_CLIP_VP_STATE =           7,
+   AUB_TRACE_SF_VP_STATE =             8,
+   AUB_TRACE_CC_VP_STATE =             0x9,
+   AUB_TRACE_SAMPLER_STATE =           0xa,
+   AUB_TRACE_KERNEL_INSTRUCTIONS =     0xb,
+   AUB_TRACE_SCRATCH_SPACE =           0xc,
+   AUB_TRACE_SAMPLER_DEFAULT_COLOR =    0xd,
+
+   AUB_TRACE_SCISSOR_STATE =           0x15,
+   AUB_TRACE_BLEND_STATE =             0x16,
+   AUB_TRACE_DEPTH_STENCIL_STATE =     0x17,
+
+   /* Not written to .aub files the same way the structures above are. */
+   AUB_TRACE_NO_TYPE =                 0x100,
+   AUB_TRACE_BINDING_TABLE =           0x101,
+   AUB_TRACE_SURFACE_STATE =           0x102,
+   AUB_TRACE_VS_CONSTANTS =            0x103,
+   AUB_TRACE_WM_CONSTANTS =            0x104,
+};
  
  /** Subclass of Mesa vertex program */
  struct brw_vertex_program {
@@ -229,8 +261,8 @@ struct brw_wm_prog_data {
  
     GLuint first_curbe_grf;
     GLuint first_curbe_grf_16;
-   GLuint total_grf;
-   GLuint total_grf_16;
+   GLuint reg_blocks;
+   GLuint reg_blocks_16;
     GLuint total_scratch;
  
     GLuint nr_params;       /**< number of float params/constants */
@@ -361,9 +393,11 @@ struct brw_cache_item {
     /** 32-bit hash of the key data */
     GLuint hash;
     GLuint key_size;            /* for variable-sized keys */
+   GLuint aux_size;
     const void *key;
  
-   drm_intel_bo *bo;
+   uint32_t offset;
+   uint32_t size;
  
     struct brw_cache_item *next;
  };   
@@ -374,14 +408,11 @@ struct brw_cache {
     struct brw_context *brw;
  
     struct brw_cache_item **items;
+   drm_intel_bo *bo;
     GLuint size, n_items;
  
-   char *name[BRW_MAX_CACHE];
-
-   /* Record of the last BOs chosen for each cache_id.  Used to set
-    * brw->state.dirty.cache when a new cache item is chosen.
-    */
-   drm_intel_bo *last_bo[BRW_MAX_CACHE];
+   uint32_t next_offset;
+   bool bo_used_by_gpu;
  };
  
  
@@ -486,28 +517,6 @@ struct brw_context
  
     struct {
        struct brw_state_flags dirty;
-
-      /**
-       * \name Cached region pointers
-       *
-       * When the draw buffer is updated, often the depth buffer is not
-       * changed. Caching the pointer to the buffer's region allows us to
-       * detect when the buffer has in fact changed, and allows us to avoid
-       * updating the buffer's GPU state when it has not.
-       *
-       * The original of each cached pointer is an instance of
-       * \c intel_renderbuffer.region.
-       *
-       * \see brw_set_draw_region()
-       *
-       * \{
-       */
-
-      /** \see struct brw_tracked_state brw_depthbuffer */
-      struct intel_region *depth_region;
-
-      /** \} */
-
        /**
         * List of buffers accumulated in brw_validate_state to receive
         * drm_intel_bo_check_aperture treatment before exec, so we can
@@ -520,7 +529,7 @@ struct brw_context
         * the CURBE, the depth buffer, and a query BO.
         */
        drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
-      int validated_bo_count;
+      unsigned int validated_bo_count;
     } state;
  
     struct brw_cache cache;
@@ -654,8 +663,9 @@ struct brw_context
        struct brw_vs_prog_data *prog_data;
        int8_t *constant_map; /* variable array following prog_data */
  
-      drm_intel_bo *prog_bo;
        drm_intel_bo *const_bo;
+      /** Offset in the program cache to the VS program */
+      uint32_t prog_offset;
        uint32_t state_offset;
  
        /** Binding table of pointers to surf_bo entries */
@@ -671,14 +681,16 @@ struct brw_context
        struct brw_gs_prog_data *prog_data;
  
        GLboolean prog_active;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
        uint32_t state_offset;
-      drm_intel_bo *prog_bo;
     } gs;
  
     struct {
        struct brw_clip_prog_data *prog_data;
  
-      drm_intel_bo *prog_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
  
        /* Offset in the batch to the CLIP state on pre-gen6. */
        uint32_t state_offset;
@@ -693,7 +705,8 @@ struct brw_context
     struct {
        struct brw_sf_prog_data *prog_data;
  
-      drm_intel_bo *prog_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
        uint32_t state_offset;
        uint32_t vp_offset;
     } sf;
@@ -720,12 +733,14 @@ struct brw_context
        GLuint sampler_count;
        uint32_t sampler_offset;
  
+      /** Offset in the program cache to the WM program */
+      uint32_t prog_offset;
+
        /** Binding table of pointers to surf_bo entries */
        uint32_t bind_bo_offset;
        uint32_t surf_offset[BRW_WM_MAX_SURF];
        uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
  
-      drm_intel_bo *prog_bo;
        drm_intel_bo *const_bo; /* pull constant buffer. */
        /**
         * This is offset in the batch to the push constants on gen6.
@@ -733,13 +748,33 @@ struct brw_context
         * Pre-gen6, push constants live in the CURBE.
         */
        uint32_t push_const_offset;
+
+      /** @{ register allocator */
+
+      struct ra_regs *regs;
+
+      /** Array of the ra classes for the unaligned contiguous
+       * register block sizes used.
+       */
+      int *classes;
+
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
+
+      /**
+       * ra class for the aligned pairs we use for PLN, which doesn't
+       * appear in *classes.
+       */
+      int aligned_pairs_class;
+
+      /** @} */
     } wm;
  
  
     struct {
-      /* gen4 */
-      drm_intel_bo *prog_bo;
-
        uint32_t state_offset;
        uint32_t blend_state_offset;
        uint32_t depth_stencil_state_offset;
@@ -758,6 +793,14 @@ struct brw_context
  
     int num_prepare_atoms, num_emit_atoms;
     struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
+
+   /* If (INTEL_DEBUG & DEBUG_BATCH) */
+   struct {
+      uint32_t offset;
+      uint32_t size;
+      enum state_struct_type type;
+   } *state_batch_list;
+   int state_batch_count;
  };
  
  
@@ -883,6 +926,37 @@ float convert_param(enum param_conversion conversion, float param)
     }
  }
  
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity.  The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+   return ALIGN(reg_count, 16) / 16 - 1;
+}
+
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+                 uint32_t prog_offset)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 5) {
+      /* Using state base address. */
+      return prog_offset;
+   }
+
+   drm_intel_bo_emit_reloc(intel->batch.bo,
+                          state_offset,
+                          brw->cache.bo,
+                          prog_offset,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return brw->cache.bo->offset + prog_offset;
+}
+
  GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
  
  #endif