i965: Start adding the VS visitor and codegen.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.h
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h

index 4fbb554a39dc840d04167d26db89c68bc3c337e7..7b6b64c1a5c384eafac9872e6b1e8782d2227a0a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -120,32 +120,62 @@
  
  struct brw_context;
  
-#define BRW_NEW_URB_FENCE               0x1
-#define BRW_NEW_FRAGMENT_PROGRAM        0x2
-#define BRW_NEW_VERTEX_PROGRAM          0x4
-#define BRW_NEW_INPUT_DIMENSIONS        0x8
-#define BRW_NEW_CURBE_OFFSETS           0x10
-#define BRW_NEW_REDUCED_PRIMITIVE       0x20
-#define BRW_NEW_PRIMITIVE               0x40
-#define BRW_NEW_CONTEXT                 0x80
-#define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
-#define BRW_NEW_PSP                     0x800
-#define BRW_NEW_WM_SURFACES            0x1000
-#define BRW_NEW_BINDING_TABLE          0x2000
-#define BRW_NEW_INDICES                        0x4000
-#define BRW_NEW_VERTICES               0x8000
+enum brw_state_id {
+   BRW_STATE_URB_FENCE,
+   BRW_STATE_FRAGMENT_PROGRAM,
+   BRW_STATE_VERTEX_PROGRAM,
+   BRW_STATE_INPUT_DIMENSIONS,
+   BRW_STATE_CURBE_OFFSETS,
+   BRW_STATE_REDUCED_PRIMITIVE,
+   BRW_STATE_PRIMITIVE,
+   BRW_STATE_CONTEXT,
+   BRW_STATE_WM_INPUT_DIMENSIONS,
+   BRW_STATE_PSP,
+   BRW_STATE_WM_SURFACES,
+   BRW_STATE_VS_BINDING_TABLE,
+   BRW_STATE_GS_BINDING_TABLE,
+   BRW_STATE_PS_BINDING_TABLE,
+   BRW_STATE_INDICES,
+   BRW_STATE_VERTICES,
+   BRW_STATE_BATCH,
+   BRW_STATE_NR_WM_SURFACES,
+   BRW_STATE_NR_VS_SURFACES,
+   BRW_STATE_INDEX_BUFFER,
+   BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_WM_CONSTBUF,
+   BRW_STATE_PROGRAM_CACHE,
+   BRW_STATE_STATE_BASE_ADDRESS,
+};
+
+#define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
+#define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
+#define BRW_NEW_INPUT_DIMENSIONS        (1 << BRW_STATE_INPUT_DIMENSIONS)
+#define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
+#define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PRIMITIVE               (1 << BRW_STATE_PRIMITIVE)
+#define BRW_NEW_CONTEXT                 (1 << BRW_STATE_CONTEXT)
+#define BRW_NEW_WM_INPUT_DIMENSIONS     (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
+#define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
+#define BRW_NEW_WM_SURFACES            (1 << BRW_STATE_WM_SURFACES)
+#define BRW_NEW_VS_BINDING_TABLE       (1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE       (1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE       (1 << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_INDICES                        (1 << BRW_STATE_INDICES)
+#define BRW_NEW_VERTICES               (1 << BRW_STATE_VERTICES)
  /**
   * Used for any batch entry with a relocated pointer that will be used
   * by any 3D rendering.
   */
-#define BRW_NEW_BATCH                  0x10000
-/** brw->depth_region updated */
-#define BRW_NEW_DEPTH_BUFFER           0x20000
-#define BRW_NEW_NR_WM_SURFACES         0x40000
-#define BRW_NEW_NR_VS_SURFACES         0x80000
-#define BRW_NEW_INDEX_BUFFER           0x100000
-#define BRW_NEW_VS_CONSTBUF            0x200000
-#define BRW_NEW_WM_CONSTBUF            0x400000
+#define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
+/** \see brw.state.depth_region */
+#define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
+#define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
+#define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
+#define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
+#define BRW_NEW_STATE_BASE_ADDRESS     (1 << BRW_STATE_STATE_BASE_ADDRESS)
  
  struct brw_state_flags {
     /** State update flags signalled by mesa internals */
@@ -158,6 +188,32 @@ struct brw_state_flags {
     GLuint cache;
  };
  
+enum state_struct_type {
+   AUB_TRACE_VS_STATE =                        1,
+   AUB_TRACE_GS_STATE =                        2,
+   AUB_TRACE_CLIP_STATE =              3,
+   AUB_TRACE_SF_STATE =                        4,
+   AUB_TRACE_WM_STATE =                        5,
+   AUB_TRACE_CC_STATE =                        6,
+   AUB_TRACE_CLIP_VP_STATE =           7,
+   AUB_TRACE_SF_VP_STATE =             8,
+   AUB_TRACE_CC_VP_STATE =             0x9,
+   AUB_TRACE_SAMPLER_STATE =           0xa,
+   AUB_TRACE_KERNEL_INSTRUCTIONS =     0xb,
+   AUB_TRACE_SCRATCH_SPACE =           0xc,
+   AUB_TRACE_SAMPLER_DEFAULT_COLOR =    0xd,
+
+   AUB_TRACE_SCISSOR_STATE =           0x15,
+   AUB_TRACE_BLEND_STATE =             0x16,
+   AUB_TRACE_DEPTH_STENCIL_STATE =     0x17,
+
+   /* Not written to .aub files the same way the structures above are. */
+   AUB_TRACE_NO_TYPE =                 0x100,
+   AUB_TRACE_BINDING_TABLE =           0x101,
+   AUB_TRACE_SURFACE_STATE =           0x102,
+   AUB_TRACE_VS_CONSTANTS =            0x103,
+   AUB_TRACE_WM_CONSTANTS =            0x104,
+};
  
  /** Subclass of Mesa vertex program */
  struct brw_vertex_program {
@@ -204,13 +260,16 @@ struct brw_wm_prog_data {
     GLuint urb_read_length;
  
     GLuint first_curbe_grf;
-   GLuint total_grf;
+   GLuint first_curbe_grf_16;
+   GLuint reg_blocks;
+   GLuint reg_blocks_16;
     GLuint total_scratch;
  
     GLuint nr_params;       /**< number of float params/constants */
     GLuint nr_pull_params;
     GLboolean error;
     int dispatch_width;
+   uint32_t prog_offset_16;
  
     /* Pointer to tracked values (only valid once
      * _mesa_load_state_parameters has been called at runtime).
@@ -308,7 +367,6 @@ enum brw_cache_id {
     BRW_CC_VP,
     BRW_CC_UNIT,
     BRW_WM_PROG,
-   BRW_SAMPLER_DEFAULT_COLOR,
     BRW_SAMPLER,
     BRW_WM_UNIT,
     BRW_SF_PROG,
@@ -335,11 +393,11 @@ struct brw_cache_item {
     /** 32-bit hash of the key data */
     GLuint hash;
     GLuint key_size;            /* for variable-sized keys */
+   GLuint aux_size;
     const void *key;
-   drm_intel_bo **reloc_bufs;
-   GLuint nr_reloc_bufs;
  
-   drm_intel_bo *bo;
+   uint32_t offset;
+   uint32_t size;
  
     struct brw_cache_item *next;
  };   
@@ -350,14 +408,11 @@ struct brw_cache {
     struct brw_context *brw;
  
     struct brw_cache_item **items;
+   drm_intel_bo *bo;
     GLuint size, n_items;
  
-   char *name[BRW_MAX_CACHE];
-
-   /* Record of the last BOs chosen for each cache_id.  Used to set
-    * brw->state.dirty.cache when a new cache item is chosen.
-    */
-   drm_intel_bo *last_bo[BRW_MAX_CACHE];
+   uint32_t next_offset;
+   bool bo_used_by_gpu;
  };
  
  
@@ -381,7 +436,6 @@ struct brw_tracked_state {
  #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
  #define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
  #define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
-#define CACHE_NEW_SAMPLER_DEFAULT_COLOR  (1<<BRW_SAMPLER_DEFAULT_COLOR)
  #define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
  #define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
  #define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
@@ -408,21 +462,24 @@ struct brw_cached_batch_item {
   */
  #define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
  
+struct brw_vertex_buffer {
+   /** Buffer object containing the uploaded vertex data */
+   drm_intel_bo *bo;
+   uint32_t offset;
+   /** Byte stride between elements in the uploaded array */
+   GLuint stride;
+};
  struct brw_vertex_element {
     const struct gl_client_array *glarray;
  
+   int buffer;
+
     /** The corresponding Mesa vertex attribute */
     gl_vert_attrib attrib;
     /** Size of a complete element */
     GLuint element_size;
-   /** Number of uploaded elements for this input. */
-   GLuint count;
-   /** Byte stride between elements in the uploaded array */
-   GLuint stride;
     /** Offset of the first element within the buffer object */
     unsigned int offset;
-   /** Buffer object containing the uploaded vertex data */
-   drm_intel_bo *bo;
  };
  
  
@@ -457,12 +514,9 @@ struct brw_context
     GLboolean has_negative_rhw_bug;
     GLboolean has_aa_line_parameters;
     GLboolean has_pln;
-;
+
     struct {
        struct brw_state_flags dirty;
-
-      struct intel_region *depth_region;
-
        /**
         * List of buffers accumulated in brw_validate_state to receive
         * drm_intel_bo_check_aperture treatment before exec, so we can
@@ -475,7 +529,7 @@ struct brw_context
         * the CURBE, the depth buffer, and a query BO.
         */
        drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
-      int validated_bo_count;
+      unsigned int validated_bo_count;
     } state;
  
     struct brw_cache cache;
@@ -483,15 +537,27 @@ struct brw_context
  
     struct {
        struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+      struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
+      struct {
+             uint32_t handle;
+             uint32_t offset;
+             uint32_t stride;
+      } current_buffers[VERT_ATTRIB_MAX];
  
        struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
        GLuint nr_enabled;
+      GLuint nr_buffers, nr_current_buffers;
  
        /* Summary of size and varying of active arrays, so we can check
         * for changes to this state:
         */
        struct brw_vertex_info info;
        unsigned int min_index, max_index;
+
+      /* Offset from start of vertex buffer so we can avoid redefining
+       * the same VB packed over and over again.
+       */
+      unsigned int start_vertex_bias;
     } vb;
  
     struct {
@@ -502,10 +568,10 @@ struct brw_context
         */
        const struct _mesa_index_buffer *ib;
  
-      /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+      /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
        drm_intel_bo *bo;
-      unsigned int offset;
-      unsigned int size;
+      GLuint type;
+
        /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
         * avoid re-uploading the IB packet over and over if we're actually
         * referencing the same index buffer.
@@ -518,11 +584,6 @@ struct brw_context
     const struct gl_vertex_program *vertex_program;
     const struct gl_fragment_program *fragment_program;
  
-
-   /* For populating the gtt:
-    */
-   GLuint next_free_page;
-
     /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
     uint32_t CMD_VF_STATISTICS;
     /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
@@ -539,18 +600,23 @@ struct brw_context
  
        GLboolean constrained;
  
+      GLuint max_vs_entries;   /* Maximum number of VS entries */
+      GLuint max_gs_entries;   /* Maximum number of GS entries */
+
        GLuint nr_vs_entries;
        GLuint nr_gs_entries;
        GLuint nr_clip_entries;
        GLuint nr_sf_entries;
        GLuint nr_cs_entries;
  
-      /* gen6 */
+      /* gen6:
+       * The length of each URB entry owned by the VS (or GS), as
+       * a number of 1024-bit (128-byte) rows.  Should be >= 1.
+       *
+       * gen7: Same meaning, but in 512-bit (64-byte) rows.
+       */
        GLuint vs_size;
-/*       GLuint gs_size; */
-/*       GLuint clip_size; */
-/*       GLuint sf_size; */
-/*       GLuint cs_size; */
+      GLuint gs_size;
  
        GLuint vs_start;
        GLuint gs_start;
@@ -597,41 +663,52 @@ struct brw_context
        struct brw_vs_prog_data *prog_data;
        int8_t *constant_map; /* variable array following prog_data */
  
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
        drm_intel_bo *const_bo;
+      /** Offset in the program cache to the VS program */
+      uint32_t prog_offset;
+      uint32_t state_offset;
  
        /** Binding table of pointers to surf_bo entries */
-      drm_intel_bo *bind_bo;
        uint32_t bind_bo_offset;
-      drm_intel_bo *surf_bo[BRW_VS_MAX_SURF];
        uint32_t surf_offset[BRW_VS_MAX_SURF];
        GLuint nr_surfaces;      
+
+      uint32_t push_const_offset; /* Offset in the batchbuffer */
+      int push_const_size; /* in 256-bit register increments */
     } vs;
  
     struct {
        struct brw_gs_prog_data *prog_data;
  
        GLboolean prog_active;
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      uint32_t state_offset;
     } gs;
  
     struct {
        struct brw_clip_prog_data *prog_data;
  
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
-      drm_intel_bo *vp_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+
+      /* Offset in the batch to the CLIP state on pre-gen6. */
+      uint32_t state_offset;
+
+      /* As of gen6, this is the offset in the batch to the CLIP VP,
+       * instead of vp_bo.
+       */
+      uint32_t vp_offset;
     } clip;
  
  
     struct {
        struct brw_sf_prog_data *prog_data;
  
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
-      drm_intel_bo *vp_bo;
+      /** Offset in the program cache to the CLIP program pre-gen6 */
+      uint32_t prog_offset;
+      uint32_t state_offset;
+      uint32_t vp_offset;
     } sf;
  
     struct {
@@ -643,8 +720,9 @@ struct brw_context
         */
        GLbitfield input_size_masks[4];
  
-      /** Array of surface default colors (texture border color) */
-      drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+      /** offsets in the batch to sampler default colors (texture border color)
+       */
+      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
  
        GLuint render_surf;
        GLuint nr_surfaces;      
@@ -653,38 +731,54 @@ struct brw_context
        drm_intel_bo *scratch_bo;
  
        GLuint sampler_count;
-      drm_intel_bo *sampler_bo;
+      uint32_t sampler_offset;
+
+      /** Offset in the program cache to the WM program */
+      uint32_t prog_offset;
  
        /** Binding table of pointers to surf_bo entries */
-      drm_intel_bo *bind_bo;
        uint32_t bind_bo_offset;
-      drm_intel_bo *surf_bo[BRW_WM_MAX_SURF];
        uint32_t surf_offset[BRW_WM_MAX_SURF];
+      uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
  
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *state_bo;
        drm_intel_bo *const_bo; /* pull constant buffer. */
        /**
-       *  This is the push constant BO on gen6.
+       * This is offset in the batch to the push constants on gen6.
         *
         * Pre-gen6, push constants live in the CURBE.
         */
-      drm_intel_bo *push_const_bo;
-   } wm;
+      uint32_t push_const_offset;
  
+      /** @{ register allocator */
  
-   struct {
-      /* gen4 */
-      drm_intel_bo *prog_bo;
-      drm_intel_bo *vp_bo;
+      struct ra_regs *regs;
+
+      /** Array of the ra classes for the unaligned contiguous
+       * register block sizes used.
+       */
+      int *classes;
  
-      /* gen6 */
-      drm_intel_bo *blend_state_bo;
-      drm_intel_bo *depth_stencil_state_bo;
-      drm_intel_bo *color_calc_state_bo;
+      /**
+       * Mapping for register-allocated objects in *regs to the first
+       * GRF for that object.
+      */
+      uint8_t *ra_reg_to_grf;
  
-      drm_intel_bo *state_bo;
+      /**
+       * ra class for the aligned pairs we use for PLN, which doesn't
+       * appear in *classes.
+       */
+      int aligned_pairs_class;
+
+      /** @} */
+   } wm;
+
+
+   struct {
        uint32_t state_offset;
+      uint32_t blend_state_offset;
+      uint32_t depth_stencil_state_offset;
+      uint32_t vp_offset;
     } cc;
  
     struct {
@@ -696,6 +790,17 @@ struct brw_context
     /* Used to give every program string a unique id
      */
     GLuint program_id;
+
+   int num_prepare_atoms, num_emit_atoms;
+   struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
+
+   /* If (INTEL_DEBUG & DEBUG_BATCH) */
+   struct {
+      uint32_t offset;
+      uint32_t size;
+      enum state_struct_type type;
+   } *state_batch_list;
+   int state_batch_count;
  };
  
  
@@ -751,9 +856,6 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
   */
  void brw_upload_urb_fence(struct brw_context *brw);
  
-/* brw_cc.c */
-void brw_update_cc_vp(struct brw_context *brw);
-
  /* brw_curbe.c
   */
  void brw_upload_cs_urb_state(struct brw_context *brw);
@@ -761,10 +863,6 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
  /* brw_disasm.c */
  int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
  
-/* brw_state.c */
-void brw_enable(struct gl_context * ctx, GLenum cap, GLboolean state);
-void brw_depth_range(struct gl_context *ctx, GLclampd nearval, GLclampd farval);
-
  /*======================================================================
   * Inline conversion functions.  These are better-typed than the
   * macros used previously:
@@ -828,7 +926,37 @@ float convert_param(enum param_conversion conversion, float param)
     }
  }
  
+/**
+ * Pre-gen6, the register file of the EUs was shared between threads,
+ * and each thread used some subset allocated on a 16-register block
+ * granularity.  The unit states wanted these block counts.
+ */
+static inline int
+brw_register_blocks(int reg_count)
+{
+   return ALIGN(reg_count, 16) / 16 - 1;
+}
+
+static inline uint32_t
+brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
+                 uint32_t prog_offset)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 5) {
+      /* Using state base address. */
+      return prog_offset;
+   }
+
+   drm_intel_bo_emit_reloc(intel->batch.bo,
+                          state_offset,
+                          brw->cache.bo,
+                          prog_offset,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return brw->cache.bo->offset + prog_offset;
+}
+
  GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
  
  #endif
-