From 42a805700039e81a9245f46f153e2cd9705cd0d7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 12 Apr 2011 15:42:06 -0700 Subject: [PATCH] i965: Allocate the whole URB to the VS and fix calculations for Gen6. Since we never enable the GS on Sandybridge, there's no need to allocate it any URB space. Furthermore, the previous calculation was incorrect: it neglected to multiply by nr_vs_entries, instead comparing whether twice the size of a single VS URB entry was bigger than the entire URB space. It also neglected to take into account that vs_size is in units of 128 byte blocks, while urb_size is in bytes. Despite the above problems, the calculations resulted in an acceptable programming of the URB in most cases, at least on GT2. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_context.c | 5 +++- src/mesa/drivers/dri/i965/brw_context.h | 5 +++- src/mesa/drivers/dri/i965/gen6_urb.c | 34 ++++++++++++------------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a74ba5c90b8..230d326fa12 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -182,7 +182,6 @@ GLboolean brwCreateContext( int api, /* WM maximum threads is number of EUs times number of threads per EU. */ if (intel->gen >= 6) { - brw->urb.size = 1024; if (IS_GT2(intel->intelScreen->deviceID)) { /* This could possibly be 80, but is supposed to require * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a @@ -190,9 +189,13 @@ GLboolean brwCreateContext( int api, */ brw->wm_max_threads = 40; brw->vs_max_threads = 60; + brw->urb.size = 64; /* volume 5c.5 section 5.1 */ + brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */ } else { brw->wm_max_threads = 40; brw->vs_max_threads = 24; + brw->urb.size = 32; /* volume 5c.5 section 5.1 */ + brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */ } } else if (intel->gen == 5) { brw->urb.size = 1024; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4b4dfbafb1a..1daa49abfb3 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -568,6 +568,9 @@ struct brw_context GLboolean constrained; + GLuint max_vs_handles; /* Maximum number of VS handles */ + GLuint max_gs_handles; /* Maximum number of GS handles */ + GLuint nr_vs_entries; GLuint nr_gs_entries; GLuint nr_clip_entries; @@ -579,7 +582,7 @@ struct brw_context * a number of 1024-bit (128-byte) rows. Should be >= 1. */ GLuint vs_size; -/* GLuint gs_size; */ + GLuint gs_size; GLuint vs_start; GLuint gs_start; diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index c3819f9b360..909e1bbe9ba 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -34,26 +34,25 @@ static void prepare_urb( struct brw_context *brw ) { - int urb_size, max_urb_entry; - struct intel_context *intel = &brw->intel; - - if (IS_GT1(intel->intelScreen->deviceID)) { - urb_size = 32 * 1024; - max_urb_entry = 128; - } else { - urb_size = 64 * 1024; - max_urb_entry = 256; - } - - brw->urb.nr_vs_entries = max_urb_entry; - brw->urb.nr_gs_entries = max_urb_entry; + int nr_vs_entries; /* CACHE_NEW_VS_PROG */ brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); - if (2 * brw->urb.vs_size > urb_size) - brw->urb.nr_vs_entries = brw->urb.nr_gs_entries = - (urb_size ) / (2 * brw->urb.vs_size); + /* Calculate how many VS URB entries fit in the total URB size */ + nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128); + + if (nr_vs_entries > brw->urb.max_vs_handles) + nr_vs_entries = brw->urb.max_vs_handles; + + /* According to volume 2a, nr_vs_entries must be a multiple of 4. */ + brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4); + + /* Since we currently don't support Geometry Shaders, we always put the + * GS unit in passthrough mode and don't allocate it any URB space. + */ + brw->urb.nr_gs_entries = 0; + brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */ } static void @@ -61,6 +60,7 @@ upload_urb(struct brw_context *brw) { struct intel_context *intel = &brw->intel; + assert(brw->urb.nr_vs_entries >= 24); assert(brw->urb.nr_vs_entries % 4 == 0); assert(brw->urb.nr_gs_entries % 4 == 0); /* GS requirement */ @@ -70,7 +70,7 @@ upload_urb(struct brw_context *brw) OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); - OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | + OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT)); ADVANCE_BATCH(); } -- 2.30.2