From 8832920c298f4e13ffd5e53feeba509be69edb16 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 27 Mar 2011 01:18:41 -0700 Subject: [PATCH] i965: Initial Ivybridge URB space partitioning, including push constants. Currently this always reserves 16kB for push constants, regardless of how much space is needed, and partitions it evenly betwen the VS and FS. This is probably not ideal, but is straightforward. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/Makefile | 3 +- src/mesa/drivers/dri/i965/brw_context.c | 6 + src/mesa/drivers/dri/i965/brw_context.h | 2 + src/mesa/drivers/dri/i965/brw_defines.h | 13 +- src/mesa/drivers/dri/i965/brw_state.h | 1 + src/mesa/drivers/dri/i965/brw_state_upload.c | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 11 +- src/mesa/drivers/dri/i965/gen7_urb.c | 128 +++++++++++++++++++ 8 files changed, 162 insertions(+), 4 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen7_urb.c diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 849018b74ae..a1bbc962dee 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -96,7 +96,8 @@ DRIVER_SOURCES = \ gen6_urb.c \ gen6_viewport_state.c \ gen6_vs_state.c \ - gen6_wm_state.c + gen6_wm_state.c \ + gen7_urb.c C_SOURCES = \ $(COMMON_SOURCES) \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 3a7e33145ba..cd72bc5c242 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -182,9 +182,15 @@ GLboolean brwCreateContext( int api, if (IS_IVB_GT1(intel->intelScreen->deviceID)) { brw->wm_max_threads = 86; brw->vs_max_threads = 36; + brw->urb.size = 128; + brw->urb.max_vs_entries = 512; + brw->urb.max_gs_entries = 192; } else if (IS_IVB_GT2(intel->intelScreen->deviceID)) { brw->wm_max_threads = 86; brw->vs_max_threads = 128; + brw->urb.size = 256; + brw->urb.max_vs_entries = 704; + brw->urb.max_gs_entries = 320; } else { assert(!"Unknown gen7 device."); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 94108de7af9..b3d297deae6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -603,6 +603,8 @@ struct brw_context /* gen6: * The length of each URB entry owned by the VS (or GS), as * a number of 1024-bit (128-byte) rows. Should be >= 1. + * + * gen7: Same meaning, but in 512-bit (64-byte) rows. */ GLuint vs_size; GLuint gs_size; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fd5227d2c55..8135b3909f0 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -875,12 +875,23 @@ #define CMD_VF_STATISTICS_GM45 0x680b #define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ -#define _3DSTATE_URB 0x7805 /* GEN6+ */ +#define _3DSTATE_URB 0x7805 /* GEN6 */ # define GEN6_URB_VS_SIZE_SHIFT 16 # define GEN6_URB_VS_ENTRIES_SHIFT 0 # define GEN6_URB_GS_ENTRIES_SHIFT 8 # define GEN6_URB_GS_SIZE_SHIFT 0 +#define _3DSTATE_URB_VS 0x7830 /* GEN7+ */ +#define _3DSTATE_URB_HS 0x7831 /* GEN7+ */ +#define _3DSTATE_URB_DS 0x7832 /* GEN7+ */ +#define _3DSTATE_URB_GS 0x7833 /* GEN7+ */ +# define GEN7_URB_ENTRY_SIZE_SHIFT 16 +# define GEN7_URB_STARTING_ADDRESS_SHIFT 25 + +#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS 0x7912 /* GEN7+ */ +#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS 0x7916 /* GEN7+ */ +# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 + #define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ # define GEN6_CC_VIEWPORT_MODIFY (1 << 12) # define GEN6_SF_VIEWPORT_MODIFY (1 << 11) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 8b9e3a4ec5d..d67524ff264 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -111,6 +111,7 @@ extern const struct brw_tracked_state gen6_vs_constants; extern const struct brw_tracked_state gen6_vs_state; extern const struct brw_tracked_state gen6_wm_constants; extern const struct brw_tracked_state gen6_wm_state; +extern const struct brw_tracked_state gen7_urb; /*********************************************************************** * brw_state.c diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 7524b01bb75..9476c2c359b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -191,7 +191,7 @@ const struct brw_tracked_state *gen7_atoms[] = &brw_cc_vp, &gen6_viewport_state, /* must do after *_vp stages */ - &gen6_urb, + &gen7_urb, &gen6_blend_state, /* must do before cc unit */ &gen6_color_calc_state, /* must do before cc unit */ &gen6_depth_stencil_state, /* must do before cc unit */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7267deaa534..7d5eb353eee 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -432,7 +432,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* See emit_vertex_write() for where the VUE's overhead on top of the * attributes comes from. */ - if (intel->gen >= 6) { + if (intel->gen >= 7) { + int header_regs = 2; + if (c->key.nr_userclip) + header_regs += 2; + + /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the + * number of 64-byte (512-bit) units. + */ + c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 3) / 4; + } else if (intel->gen == 6) { int header_regs = 2; if (c->key.nr_userclip) header_regs += 2; diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c new file mode 100644 index 00000000000..3a614693dfc --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -0,0 +1,128 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "main/macros.h" +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/** + * The following diagram shows how we partition the URB: + * + * 8kB 8kB Rest of the URB space + * ____-____ ____-____ _________________-_________________ + * / \ / \ / \ + * +-------------------------------------------------------------+ + * | VS Push | FS Push | VS | + * | Constants | Constants | Handles | + * +-------------------------------------------------------------+ + * + * Notably, push constants must be stored at the beginning of the URB + * space, while entries can be stored anywhere. Ivybridge has a maximum + * constant buffer size of 16kB. + * + * Currently we split the constant buffer space evenly between VS and FS. + * This is probably not ideal, but simple. + * + * Ivybridge GT1 has 128kB of URB space. + * Ivybridge GT2 has 256kB of URB space. + * + * See "Volume 2a: 3D Pipeline," section 1.8. + */ +static void +prepare_urb(struct brw_context *brw) +{ + /* Total space for entries is URB size - 16kB for push constants */ + int handle_region_size = (brw->urb.size - 16) * 1024; /* bytes */ + + /* CACHE_NEW_VS_PROG */ + brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); + + int nr_vs_entries = handle_region_size / (brw->urb.vs_size * 64); + if (nr_vs_entries > brw->urb.max_vs_entries) + nr_vs_entries = brw->urb.max_vs_entries; + + /* According to volume 2a, nr_vs_entries must be a multiple of 8. */ + brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8); + + /* URB Starting Addresses are specified in multiples of 8kB. */ + brw->urb.vs_start = 2; /* skip over push constants */ +} + +static void +upload_urb(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + assert(brw->urb.nr_vs_entries % 8 == 0); + assert(brw->urb.nr_gs_entries % 8 == 0); + /* GS requirement */ + assert(!brw->gs.prog_bo); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2)); + OUT_BATCH(8); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2)); + OUT_BATCH(8 | 8 << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2)); + OUT_BATCH(brw->urb.nr_vs_entries | + ((brw->urb.vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) | + (brw->urb.vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); + + /* Allocate the GS, HS, and DS zero space - we don't use them. */ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2)); + OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) | + (2 << GEN7_URB_STARTING_ADDRESS_SHIFT)); + ADVANCE_BATCH(); +} + +const struct brw_tracked_state gen7_urb = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), + }, + .prepare = prepare_urb, + .emit = upload_urb, +}; -- 2.30.2