i965/gs: Allocate push constant space for use by GS.
authorPaul Berry <stereotype441@gmail.com>
Wed, 27 Mar 2013 17:34:55 +0000 (10:34 -0700)
committerPaul Berry <stereotype441@gmail.com>
Sun, 1 Sep 2013 00:11:49 +0000 (17:11 -0700)
Previously, we would always use the same push constant allocation
regardless of what shader programs were being run: the available push
constant space was split into 2 equal size partitions, one for the
vertex shader, and one for the fragment shader.

Now that we are adding geometry shader support, we need to do
something smarter.  This patch adjusts things so that when a geometry
shader is in use, we split the available push constant space into 3
nearly-equal size partitions instead of 2.

Since the push constant allocation is now affected by GL state, it can
no longer be set up by brw_upload_initial_gpu_state(); instead it must
be set up by a state atom.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/gen7_blorp.cpp
src/mesa/drivers/dri/i965/gen7_urb.c

index 0bfe606420b7564ca92eb618c2765ac3e145d92a..a14e2b55bd62b16e4d09c10f9878c6caa868f690 100644 (file)
@@ -1508,7 +1508,8 @@ gen6_get_sample_position(struct gl_context *ctx,
 
 /* gen7_urb.c */
 void
-gen7_allocate_push_constants(struct brw_context *brw);
+gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
+                              unsigned gs_size, unsigned fs_size);
 
 void
 gen7_emit_urb_state(struct brw_context *brw,
index 832ff55a0c002dd90bc7e6316642e3545b4b538f..8d9a824248a426733f69530284136865018997ac 100644 (file)
@@ -1284,6 +1284,7 @@ enum brw_message_target {
 # define GEN7_URB_STARTING_ADDRESS_SHIFT                25
 
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_GS         0x7915 /* GEN7+ */
 #define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
 # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
 
index 85f82fe81e1a2f45b76beb0bc7770f8930ef3c17..481463991ae7af2d4c9026eb530f309b16ebb5da 100644 (file)
@@ -112,6 +112,7 @@ extern const struct brw_tracked_state gen7_cc_viewport_state_pointer;
 extern const struct brw_tracked_state gen7_clip_state;
 extern const struct brw_tracked_state gen7_disable_stages;
 extern const struct brw_tracked_state gen7_ps_state;
+extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_sbe_state;
 extern const struct brw_tracked_state gen7_sf_clip_viewport;
 extern const struct brw_tracked_state gen7_sf_state;
@@ -220,9 +221,6 @@ uint32_t
 get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset,
                   int fs_attr, bool two_side_color, uint32_t *max_source_attr);
 
-/* gen7_urb.c */
-void gen7_allocate_push_constants(struct brw_context *brw);
-
 #ifdef __cplusplus
 }
 #endif
index b88300248272a037c523a5cfa540ebcefe83fdca..9638c69fba8423e832c35e21dc875aa51ba8c962 100644 (file)
@@ -188,6 +188,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
    &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
    &gen7_sf_clip_viewport,
 
+   &gen7_push_constant_space,
    &gen7_urb,
    &gen6_blend_state,          /* must do before cc unit */
    &gen6_color_calc_state,     /* must do before cc unit */
@@ -251,10 +252,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
       return;
 
    brw_upload_invariant_state(brw);
-
-   if (brw->gen >= 7) {
-      gen7_allocate_push_constants(brw);
-   }
 }
 
 void brw_init_state( struct brw_context *brw )
index 6c798b126319700d4bc87cacd7d0598a8cd83a34..9df3d929ffb5eb26a625d242a5c4b07f38441974 100644 (file)
@@ -51,6 +51,12 @@ static void
 gen7_blorp_emit_urb_config(struct brw_context *brw,
                            const brw_blorp_params *params)
 {
+   unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16;
+   gen7_emit_push_constant_state(brw,
+                                 urb_size / 2 /* vs_size */,
+                                 0 /* gs_size */,
+                                 urb_size / 2 /* fs_size */);
+
    /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword
     * 1.15:0 "VS Number of URB Entries".
     */
index 63467945e3a0397d5db9c24070587c73f787f97b..5a7ab473b6efaa42f2495c47c6800102558cfd83 100644 (file)
 /**
  * The following diagram shows how we partition the URB:
  *
- *      8kB         8kB              Rest of the URB space
- *   ____-____   ____-____   _________________-_________________
- *  /         \ /         \ /                                   \
+ *        16kB or 32kB               Rest of the URB space
+ *   __________-__________   _________________-_________________
+ *  /                     \ /                                   \
  * +-------------------------------------------------------------+
- * | VS Push   | FS Push   | VS                                  |
- * | Constants | Constants | Handles                             |
+ * |     VS/FS/GS Push     |              VS/GS URB              |
+ * |       Constants       |               Entries               |
  * +-------------------------------------------------------------+
  *
  * Notably, push constants must be stored at the beginning of the URB
  * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
  * doubles this (32kB).
  *
- * Currently we split the constant buffer space evenly between VS and FS.
- * This is probably not ideal, but simple.
+ * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
+ * sized) in increments of 1kB.  Haswell GT3 requires them to be located and
+ * sized in increments of 2kB.
+ *
+ * Currently we split the constant buffer space evenly among whatever stages
+ * are active.  This is probably not ideal, but simple.
  *
  * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
  * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
  * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
  * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
  */
-void
+static void
 gen7_allocate_push_constants(struct brw_context *brw)
 {
-   unsigned size = 8;
-   if (brw->is_haswell && brw->gt == 3)
-      size = 16;
+   unsigned avail_size = 16;
+   unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1;
+
+   /* BRW_NEW_GEOMETRY_PROGRAM */
+   bool gs_present = brw->geometry_program;
+
+   unsigned vs_size, gs_size;
+   if (gs_present) {
+      vs_size = avail_size / 3;
+      avail_size -= vs_size;
+      gs_size = avail_size / 2;
+      avail_size -= gs_size;
+   } else {
+      vs_size = avail_size / 2;
+      avail_size -= vs_size;
+      gs_size = 0;
+   }
+   unsigned fs_size = avail_size;
+
+   gen7_emit_push_constant_state(brw, multiplier * vs_size,
+                                 multiplier * gs_size, multiplier * fs_size);
+}
+
+void
+gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
+                              unsigned gs_size, unsigned fs_size)
+{
+   unsigned offset = 0;
 
-   BEGIN_BATCH(4);
+   BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
-   OUT_BATCH(size);
+   OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   offset += vs_size;
+
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2));
+   OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   offset += gs_size;
 
    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   OUT_BATCH(offset | fs_size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    ADVANCE_BATCH();
 
    /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
@@ -99,6 +133,15 @@ gen7_allocate_push_constants(struct brw_context *brw)
    }
 }
 
+const struct brw_tracked_state gen7_push_constant_space = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM,
+      .cache = 0,
+   },
+   .emit = gen7_allocate_push_constants,
+};
+
 static void
 gen7_upload_urb(struct brw_context *brw)
 {