i965: Initial Ivybridge URB space partitioning, including push constants.
authorKenneth Graunke <kenneth@whitecape.org>
Sun, 27 Mar 2011 08:18:41 +0000 (01:18 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 18 May 2011 06:32:58 +0000 (23:32 -0700)
Currently this always reserves 16kB for push constants, regardless of
how much space is needed, and partitions it evenly betwen the VS and FS.
This is probably not ideal, but is straightforward.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/Makefile
src/mesa/drivers/dri/i965/brw_context.c
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_state_upload.c
src/mesa/drivers/dri/i965/brw_vs_emit.c
src/mesa/drivers/dri/i965/gen7_urb.c [new file with mode: 0644]

index 849018b74ae503bab59a4103a6ca4b62c9d9db24..a1bbc962dee008144a2687d5db872e7de76aaf1f 100644 (file)
@@ -96,7 +96,8 @@ DRIVER_SOURCES = \
        gen6_urb.c \
        gen6_viewport_state.c \
        gen6_vs_state.c \
-       gen6_wm_state.c
+       gen6_wm_state.c \
+       gen7_urb.c
 
 C_SOURCES = \
        $(COMMON_SOURCES) \
index 3a7e33145ba99abdb0027f7c944ffe156920e164..cd72bc5c242dffd541280f8e8a08650fa4f68216 100644 (file)
@@ -182,9 +182,15 @@ GLboolean brwCreateContext( int api,
       if (IS_IVB_GT1(intel->intelScreen->deviceID)) {
         brw->wm_max_threads = 86;
         brw->vs_max_threads = 36;
+        brw->urb.size = 128;
+        brw->urb.max_vs_entries = 512;
+        brw->urb.max_gs_entries = 192;
       } else if (IS_IVB_GT2(intel->intelScreen->deviceID)) {
         brw->wm_max_threads = 86;
         brw->vs_max_threads = 128;
+        brw->urb.size = 256;
+        brw->urb.max_vs_entries = 704;
+        brw->urb.max_gs_entries = 320;
       } else {
         assert(!"Unknown gen7 device.");
       }
index 94108de7af95be3fcca92fcde0b045f9f02d1aa8..b3d297deae628387e424f82fcc2a12d403c1f4be 100644 (file)
@@ -603,6 +603,8 @@ struct brw_context
       /* gen6:
        * The length of each URB entry owned by the VS (or GS), as
        * a number of 1024-bit (128-byte) rows.  Should be >= 1.
+       *
+       * gen7: Same meaning, but in 512-bit (64-byte) rows.
        */
       GLuint vs_size;
       GLuint gs_size;
index fd5227d2c55c390f0037ef7ee200156911fe700c..8135b3909f006bf05c9a0f5a0aa47421a5811c9a 100644 (file)
 #define CMD_VF_STATISTICS_GM45        0x680b
 #define _3DSTATE_CC_STATE_POINTERS             0x780e /* GEN6+ */
 
-#define _3DSTATE_URB                           0x7805 /* GEN6+ */
+#define _3DSTATE_URB                           0x7805 /* GEN6 */
 # define GEN6_URB_VS_SIZE_SHIFT                                16
 # define GEN6_URB_VS_ENTRIES_SHIFT                     0
 # define GEN6_URB_GS_ENTRIES_SHIFT                     8
 # define GEN6_URB_GS_SIZE_SHIFT                                0
 
+#define _3DSTATE_URB_VS                         0x7830 /* GEN7+ */
+#define _3DSTATE_URB_HS                         0x7831 /* GEN7+ */
+#define _3DSTATE_URB_DS                         0x7832 /* GEN7+ */
+#define _3DSTATE_URB_GS                         0x7833 /* GEN7+ */
+# define GEN7_URB_ENTRY_SIZE_SHIFT                      16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT                25
+
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
+
 #define _3DSTATE_VIEWPORT_STATE_POINTERS       0x780d /* GEN6+ */
 # define GEN6_CC_VIEWPORT_MODIFY                       (1 << 12)
 # define GEN6_SF_VIEWPORT_MODIFY                       (1 << 11)
index 8b9e3a4ec5d7799652cfe38a5fb08953ac5563b3..d67524ff264c5b280f396e5842d3349067508faf 100644 (file)
@@ -111,6 +111,7 @@ extern const struct brw_tracked_state gen6_vs_constants;
 extern const struct brw_tracked_state gen6_vs_state;
 extern const struct brw_tracked_state gen6_wm_constants;
 extern const struct brw_tracked_state gen6_wm_state;
+extern const struct brw_tracked_state gen7_urb;
 
 /***********************************************************************
  * brw_state.c
index 7524b01bb7596e35535628c223a1fa3558a7fbc4..9476c2c359ba381ca274d0c7248e0c9ef45962f3 100644 (file)
@@ -191,7 +191,7 @@ const struct brw_tracked_state *gen7_atoms[] =
    &brw_cc_vp,
    &gen6_viewport_state,       /* must do after *_vp stages */
 
-   &gen6_urb,
+   &gen7_urb,
    &gen6_blend_state,          /* must do before cc unit */
    &gen6_color_calc_state,     /* must do before cc unit */
    &gen6_depth_stencil_state,  /* must do before cc unit */
index 7267deaa534411a6f6d91730f942856147eeb1c2..7d5eb353eee938f70af89fe1bb9058ccd2298865 100644 (file)
@@ -432,7 +432,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    /* See emit_vertex_write() for where the VUE's overhead on top of the
     * attributes comes from.
     */
-   if (intel->gen >= 6) {
+   if (intel->gen >= 7) {
+      int header_regs = 2;
+      if (c->key.nr_userclip)
+        header_regs += 2;
+
+      /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the
+       * number of 64-byte (512-bit) units.
+       */
+      c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 3) / 4;
+   } else if (intel->gen == 6) {
       int header_regs = 2;
       if (c->key.nr_userclip)
         header_regs += 2;
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
new file mode 100644 (file)
index 0000000..3a61469
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/**
+ * The following diagram shows how we partition the URB:
+ *
+ *      8kB         8kB              Rest of the URB space
+ *   ____-____   ____-____   _________________-_________________
+ *  /         \ /         \ /                                   \
+ * +-------------------------------------------------------------+
+ * | VS Push   | FS Push   | VS                                  |
+ * | Constants | Constants | Handles                             |
+ * +-------------------------------------------------------------+
+ *
+ * Notably, push constants must be stored at the beginning of the URB
+ * space, while entries can be stored anywhere.  Ivybridge has a maximum
+ * constant buffer size of 16kB.
+ *
+ * Currently we split the constant buffer space evenly between VS and FS.
+ * This is probably not ideal, but simple.
+ *
+ * Ivybridge GT1 has 128kB of URB space.
+ * Ivybridge GT2 has 256kB of URB space.
+ *
+ * See "Volume 2a: 3D Pipeline," section 1.8.
+ */
+static void
+prepare_urb(struct brw_context *brw)
+{
+   /* Total space for entries is URB size - 16kB for push constants */
+   int handle_region_size = (brw->urb.size - 16) * 1024; /* bytes */
+
+   /* CACHE_NEW_VS_PROG */
+   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+
+   int nr_vs_entries = handle_region_size / (brw->urb.vs_size * 64);
+   if (nr_vs_entries > brw->urb.max_vs_entries)
+      nr_vs_entries = brw->urb.max_vs_entries;
+
+   /* According to volume 2a, nr_vs_entries must be a multiple of 8. */
+   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8);
+
+   /* URB Starting Addresses are specified in multiples of 8kB. */
+   brw->urb.vs_start = 2; /* skip over push constants */
+}
+
+static void
+upload_urb(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   assert(brw->urb.nr_vs_entries % 8 == 0);
+   assert(brw->urb.nr_gs_entries % 8 == 0);
+   /* GS requirement */
+   assert(!brw->gs.prog_bo);
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
+   OUT_BATCH(8);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
+   OUT_BATCH(8 | 8 << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
+   OUT_BATCH(brw->urb.nr_vs_entries |
+             ((brw->urb.vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
+            (brw->urb.vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+
+   /* Allocate the GS, HS, and DS zero space - we don't use them. */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
+   },
+   .prepare = prepare_urb,
+   .emit = upload_urb,
+};