i965/gen7.5: Fix lower bound on number of VS URB entries.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.h
index dae3219a09a5f689e92c19bca1108ded643a3511..57f086bdb5c99693d0579e8e56441a311f167455 100644 (file)
@@ -125,12 +125,14 @@ extern "C" {
 struct brw_context;
 struct brw_instruction;
 struct brw_vs_prog_key;
+struct brw_vec4_prog_key;
 struct brw_wm_prog_key;
 struct brw_wm_prog_data;
 
 enum brw_state_id {
    BRW_STATE_URB_FENCE,
    BRW_STATE_FRAGMENT_PROGRAM,
+   BRW_STATE_GEOMETRY_PROGRAM,
    BRW_STATE_VERTEX_PROGRAM,
    BRW_STATE_CURBE_OFFSETS,
    BRW_STATE_REDUCED_PRIMITIVE,
@@ -146,8 +148,10 @@ enum brw_state_id {
    BRW_STATE_BATCH,
    BRW_STATE_INDEX_BUFFER,
    BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_GS_CONSTBUF,
    BRW_STATE_PROGRAM_CACHE,
    BRW_STATE_STATE_BASE_ADDRESS,
+   BRW_STATE_VUE_MAP_VS,
    BRW_STATE_VUE_MAP_GEOM_OUT,
    BRW_STATE_TRANSFORM_FEEDBACK,
    BRW_STATE_RASTERIZER_DISCARD,
@@ -155,11 +159,13 @@ enum brw_state_id {
    BRW_STATE_UNIFORM_BUFFER,
    BRW_STATE_META_IN_PROGRESS,
    BRW_STATE_INTERPOLATION_MAP,
+   BRW_STATE_PUSH_CONSTANT_ALLOCATION,
    BRW_NUM_STATE_BITS
 };
 
 #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
 #define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_GEOMETRY_PROGRAM        (1 << BRW_STATE_GEOMETRY_PROGRAM)
 #define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
 #define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
 #define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
@@ -180,8 +186,10 @@ enum brw_state_id {
 /** \see brw.state.depth_region */
 #define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_GS_CONSTBUF            (1 << BRW_STATE_GS_CONSTBUF)
 #define BRW_NEW_PROGRAM_CACHE          (1 << BRW_STATE_PROGRAM_CACHE)
 #define BRW_NEW_STATE_BASE_ADDRESS     (1 << BRW_STATE_STATE_BASE_ADDRESS)
+#define BRW_NEW_VUE_MAP_VS             (1 << BRW_STATE_VUE_MAP_VS)
 #define BRW_NEW_VUE_MAP_GEOM_OUT       (1 << BRW_STATE_VUE_MAP_GEOM_OUT)
 #define BRW_NEW_TRANSFORM_FEEDBACK     (1 << BRW_STATE_TRANSFORM_FEEDBACK)
 #define BRW_NEW_RASTERIZER_DISCARD     (1 << BRW_STATE_RASTERIZER_DISCARD)
@@ -189,6 +197,7 @@ enum brw_state_id {
 #define BRW_NEW_UNIFORM_BUFFER          (1 << BRW_STATE_UNIFORM_BUFFER)
 #define BRW_NEW_META_IN_PROGRESS        (1 << BRW_STATE_META_IN_PROGRESS)
 #define BRW_NEW_INTERPOLATION_MAP       (1 << BRW_STATE_INTERPOLATION_MAP)
+#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1 << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
 
 struct brw_state_flags {
    /** State update flags signalled by mesa internals */
@@ -273,6 +282,13 @@ struct brw_vertex_program {
 };
 
 
+/** Subclass of Mesa geometry program */
+struct brw_geometry_program {
+   struct gl_geometry_program program;
+   unsigned id;  /**< serial no. to identify geom progs, never re-used */
+};
+
+
 /** Subclass of Mesa fragment program */
 struct brw_fragment_program {
    struct gl_fragment_program program;
@@ -462,7 +478,7 @@ struct brw_clip_prog_data {
    GLuint total_grf;
 };
 
-struct brw_gs_prog_data {
+struct brw_ff_gs_prog_data {
    GLuint urb_read_length;
    GLuint total_grf;
 
@@ -480,6 +496,12 @@ struct brw_gs_prog_data {
 struct brw_vec4_prog_data {
    struct brw_vue_map vue_map;
 
+   /**
+    * Register where the thread expects to find input data from the URB
+    * (typically uniforms, followed by per-vertex inputs).
+    */
+   unsigned dispatch_grf_start_reg;
+
    GLuint curb_read_length;
    GLuint urb_read_length;
    GLuint total_grf;
@@ -512,6 +534,22 @@ struct brw_vs_prog_data {
    bool uses_vertexid;
 };
 
+
+/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
+ * this struct!
+ */
+struct brw_gs_prog_data
+{
+   struct brw_vec4_prog_data base;
+
+   /**
+    * Size of an output vertex, measured in HWORDS (32 bytes).
+    */
+   unsigned output_vertex_size_hwords;
+
+   unsigned output_topology;
+};
+
 /** Number of texture sampler units */
 #define BRW_MAX_TEX_UNIT 16
 
@@ -581,10 +619,10 @@ struct brw_vs_prog_data {
  *    |  36 | UBO 11                  |
  *    +-------------------------------+
  *
- * Our VS binding tables are programmed as follows:
+ * Our VS (and Gen7 GS) binding tables are programmed as follows:
  *
  *    +-----+-------------------------+
- *    |   0 | VS Pull Constant Buffer |
+ *    |   0 | Pull Constant Buffer    |
  *    +-----+-------------------------+
  *    |   1 | Texture 0               |
  *    |   . |     .                   |
@@ -614,14 +652,14 @@ struct brw_vs_prog_data {
 /** Maximum size of the binding table. */
 #define BRW_MAX_WM_SURFACES          (SURF_INDEX_WM_SHADER_TIME + 1)
 
-#define SURF_INDEX_VERT_CONST_BUFFER (0)
-#define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
-#define SURF_INDEX_VS_UBO(u)         (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u)
-#define SURF_INDEX_VS_SHADER_TIME    (SURF_INDEX_VS_UBO(12))
-#define BRW_MAX_VS_SURFACES          (SURF_INDEX_VS_SHADER_TIME + 1)
+#define SURF_INDEX_VEC4_CONST_BUFFER (0)
+#define SURF_INDEX_VEC4_TEXTURE(t)   (SURF_INDEX_VEC4_CONST_BUFFER + 1 + (t))
+#define SURF_INDEX_VEC4_UBO(u)       (SURF_INDEX_VEC4_TEXTURE(BRW_MAX_TEX_UNIT) + u)
+#define SURF_INDEX_VEC4_SHADER_TIME  (SURF_INDEX_VEC4_UBO(12))
+#define BRW_MAX_VEC4_SURFACES        (SURF_INDEX_VEC4_SHADER_TIME + 1)
 
-#define SURF_INDEX_SOL_BINDING(t)    ((t))
-#define BRW_MAX_GS_SURFACES          SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
+#define BRW_MAX_GEN6_GS_SURFACES       SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
 
 /**
  * Stride in bytes between shader_time entries.
@@ -644,7 +682,8 @@ enum brw_cache_id {
    BRW_SF_UNIT, /* scissor state on gen6 */
    BRW_VS_UNIT,
    BRW_VS_PROG,
-   BRW_GS_UNIT,
+   BRW_FF_GS_UNIT,
+   BRW_FF_GS_PROG,
    BRW_GS_PROG,
    BRW_CLIP_VP,
    BRW_CLIP_UNIT,
@@ -734,7 +773,8 @@ enum shader_time_shader_type {
 #define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
 #define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
 #define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
-#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
+#define CACHE_NEW_FF_GS_UNIT             (1<<BRW_FF_GS_UNIT)
+#define CACHE_NEW_FF_GS_PROG             (1<<BRW_FF_GS_PROG)
 #define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
 #define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
 #define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
@@ -746,13 +786,6 @@ struct brw_cached_batch_item {
    struct brw_cached_batch_item *next;
 };
    
-
-
-/* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
- * be easier if C allowed arrays of packed elements?
- */
-#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
-
 struct brw_vertex_buffer {
    /** Buffer object containing the uploaded vertex data */
    drm_intel_bo *bo;
@@ -783,6 +816,32 @@ struct brw_query_object {
 };
 
 
+/**
+ * Data shared between brw_context::vs and brw_context::gs
+ */
+struct brw_stage_state
+{
+   drm_intel_bo *scratch_bo;
+   drm_intel_bo *const_bo;
+   /** Offset in the program cache to the program */
+   uint32_t prog_offset;
+   uint32_t state_offset;
+
+   uint32_t push_const_offset; /* Offset in the batchbuffer */
+   int push_const_size; /* in 256-bit register increments */
+
+   uint32_t bind_bo_offset;
+   uint32_t surf_offset[BRW_MAX_VEC4_SURFACES];
+
+   /** SAMPLER_STATE count and table offset */
+   uint32_t sampler_count;
+   uint32_t sampler_offset;
+
+   /** Offsets in the batch to sampler default colors (texture border color) */
+   uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
+};
+
+
 /**
  * brw_context is derived from gl_context.
  */
@@ -798,8 +857,7 @@ struct brw_context
 
       void (*update_texture_surface)(struct gl_context *ctx,
                                      unsigned unit,
-                                     uint32_t *binding_table,
-                                     unsigned surf_index);
+                                     uint32_t *surf_offset);
       void (*update_renderbuffer_surface)(struct brw_context *brw,
                                          struct gl_renderbuffer *rb,
                                          bool layered,
@@ -990,6 +1048,7 @@ struct brw_context
    /* Active vertex program: 
     */
    const struct gl_vertex_program *vertex_program;
+   const struct gl_geometry_program *geometry_program;
    const struct gl_fragment_program *fragment_program;
 
    /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
@@ -1014,6 +1073,7 @@ struct brw_context
 
       bool constrained;
 
+      GLuint min_vs_entries;    /* Minimum number of VS entries */
       GLuint max_vs_entries;   /* Maximum number of VS entries */
       GLuint max_gs_entries;   /* Maximum number of GS entries */
 
@@ -1069,6 +1129,13 @@ struct brw_context
       GLuint last_bufsz;
    } curbe;
 
+   /**
+    * Layout of vertex data exiting the vertex shader.
+    *
+    * BRW_NEW_VUE_MAP_VS is flagged when this VUE map changes.
+    */
+   struct brw_vue_map vue_map_vs;
+
    /**
     * Layout of vertex data exiting the geometry portion of the pipleine.
     * This comes from the geometry shader if one exists, otherwise from the
@@ -1078,20 +1145,11 @@ struct brw_context
     */
    struct brw_vue_map vue_map_geom_out;
 
+   /**
+    * Data structures used by all vec4 program compiles (not specific to any
+    * particular program).
+    */
    struct {
-      struct brw_vs_prog_data *prog_data;
-
-      drm_intel_bo *scratch_bo;
-      drm_intel_bo *const_bo;
-      /** Offset in the program cache to the VS program */
-      uint32_t prog_offset;
-      uint32_t state_offset;
-
-      uint32_t push_const_offset; /* Offset in the batchbuffer */
-      int push_const_size; /* in 256-bit register increments */
-
-      /** @{ register allocator */
-
       struct ra_regs *regs;
 
       /**
@@ -1105,22 +1163,20 @@ struct brw_context
        * GRF for that object.
       */
       uint8_t *ra_reg_to_grf;
-      /** @} */
-
-      uint32_t bind_bo_offset;
-      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
+   } vec4;
 
-      /** SAMPLER_STATE count and table offset */
-      uint32_t sampler_count;
-      uint32_t sampler_offset;
-
-      /** Offsets in the batch to sampler default colors (texture border color)
-       */
-      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
+   struct {
+      struct brw_stage_state base;
+      struct brw_vs_prog_data *prog_data;
    } vs;
 
    struct {
+      struct brw_stage_state base;
       struct brw_gs_prog_data *prog_data;
+   } gs;
+
+   struct {
+      struct brw_ff_gs_prog_data *prog_data;
 
       bool prog_active;
       /** Offset in the program cache to the CLIP program pre-gen6 */
@@ -1128,8 +1184,8 @@ struct brw_context
       uint32_t state_offset;
 
       uint32_t bind_bo_offset;
-      uint32_t surf_offset[BRW_MAX_GS_SURFACES];
-   } gs;
+      uint32_t surf_offset[BRW_MAX_GEN6_GS_SURFACES];
+   } ff_gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
@@ -1371,6 +1427,9 @@ void brw_upload_cs_urb_state(struct brw_context *brw);
  */
 void brw_fs_alloc_reg_sets(struct brw_context *brw);
 
+/* brw_vec4_reg_allocate.cpp */
+void brw_vec4_alloc_reg_set(struct brw_context *brw);
+
 /* brw_disasm.c */
 int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
 
@@ -1446,11 +1505,14 @@ gen6_get_sample_position(struct gl_context *ctx,
 
 /* gen7_urb.c */
 void
-gen7_allocate_push_constants(struct brw_context *brw);
+gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
+                              unsigned gs_size, unsigned fs_size);
 
 void
-gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
-                    GLuint vs_size, GLuint vs_start);
+gen7_emit_urb_state(struct brw_context *brw,
+                    unsigned nr_vs_entries, unsigned vs_size,
+                    unsigned vs_start, unsigned nr_gs_entries,
+                    unsigned gs_size, unsigned gs_start);
 
 
 
@@ -1552,6 +1614,20 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
                             uint32_t width, uint32_t height,
                             uint32_t tile_x, uint32_t tile_y);
 
+extern const GLuint prim_to_hw_prim[GL_POLYGON+1];
+
+void
+brw_setup_vec4_key_clip_info(struct brw_context *brw,
+                             struct brw_vec4_prog_key *key,
+                             bool program_uses_clip_distance);
+
+void
+gen6_upload_vec4_push_constants(struct brw_context *brw,
+                                const struct gl_program *prog,
+                                const struct brw_vec4_prog_data *prog_data,
+                                struct brw_stage_state *stage_state,
+                                enum state_struct_type type);
+
 #ifdef __cplusplus
 }
 #endif