From: Chris Forbes Date: Fri, 7 Jun 2013 18:11:44 +0000 (+1200) Subject: i965: Shrink Gen5 VUE map layout to be the same as Gen4. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=def84d8014e334e00b0a76f7bb635ccc62e3b67e;p=mesa.git i965: Shrink Gen5 VUE map layout to be the same as Gen4. The PRM suggests a larger layout, mostly to support having gl_ClipDistance[] somewhere predictable for the fixed-function clipper -- but it didn't actually arrive in Gen5. Just use the same layout for both Gen4 and Gen5. No Piglit regressions. Improves performance in CS:S Video Stress Test by ~3%. V2: - Remove now-useless function for determining the SF URB read offset - Remove now-unused BRW_VARYING_SLOT_POS_DUPLICATE Signed-off-by: Chris Forbes Reviewed-by: Paul Berry Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index ae6f81ae6c4..0db1a1802b8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -333,7 +333,6 @@ struct brw_wm_prog_data { typedef enum { BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX, - BRW_VARYING_SLOT_POS_DUPLICATE, BRW_VARYING_SLOT_PAD, /** * Technically this is not a varying but just a placeholder that diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 5870b60d38e..ba8782b6d0c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -76,7 +76,7 @@ static void compile_sf_prog( struct brw_context *brw, c.vue_map.varying_to_slot[BRW_VARYING_SLOT_PNTC] = c.vue_map.num_slots; c.vue_map.slot_to_varying[c.vue_map.num_slots++] = BRW_VARYING_SLOT_PNTC; } - c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel); + c.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index f908fc0667b..caeb0d06b1c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -103,6 +103,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, bool allocate ); void brw_emit_point_setup( struct brw_sf_compile *c, bool allocate ); void brw_emit_point_sprite_setup( struct brw_sf_compile *c, bool allocate ); void brw_emit_anyprim_setup( struct brw_sf_compile *c ); -int brw_sf_compute_urb_entry_read_offset(struct intel_context *intel); + +#define BRW_SF_URB_ENTRY_READ_OFFSET 1 #endif diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 7c29ba27d1a..4b5e7cc93f1 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -124,19 +124,6 @@ const struct brw_tracked_state brw_sf_vp = { .emit = upload_sf_vp }; -/** - * Compute the offset within the URB (expressed in 256-bit register - * increments) that should be used to read the VUE in th efragment shader. - */ -int -brw_sf_compute_urb_entry_read_offset(struct intel_context *intel) -{ - if (intel->gen == 5) - return 3; - else - return 1; -} - static void upload_sf_unit( struct brw_context *brw ) { struct intel_context *intel = &brw->intel; @@ -163,9 +150,7 @@ static void upload_sf_unit( struct brw_context *brw ) sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; sf->thread3.dispatch_grf_start_reg = 3; - - sf->thread3.urb_entry_read_offset = - brw_sf_compute_urb_entry_read_offset(intel); + sf->thread3.urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; /* CACHE_NEW_SF_PROG */ sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 02ba603d018..162fd55f429 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2746,7 +2746,6 @@ vec4_visitor::emit_urb_slot(int mrf, int varying) current_annotation = "NDC"; emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC]))); break; - case BRW_VARYING_SLOT_POS_DUPLICATE: case VARYING_SLOT_POS: current_annotation = "gl_Position"; emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS]))); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 720325dec5c..d173d2e31b8 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -85,34 +85,17 @@ brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map, */ switch (intel->gen) { case 4: + case 5: /* There are 8 dwords in VUE header pre-Ironlake: * dword 0-3 is indices, point width, clip flags. * dword 4-7 is ndc position * dword 8-11 is the first vertex data. - */ - assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); - assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC); - assign_vue_slot(vue_map, VARYING_SLOT_POS); - break; - case 5: - /* There are 20 DWs (D0-D19) in VUE header on Ironlake: - * dword 0-3 of the header is indices, point width, clip flags. - * dword 4-7 is the ndc position - * dword 8-11 of the vertex header is the 4D space position - * dword 12-19 of the vertex header is the user clip distance. - * dword 20-23 is a pad so that the vertex element data is aligned - * dword 24-27 is the first vertex data we fill. * - * Note: future pipeline stages expect 4D space position to be - * contiguous with the other varyings, so we make dword 24-27 a - * duplicate copy of the 4D space position. + * On Ironlake the VUE header is nominally 20 dwords, but the hardware + * will accept the same header layout as Gen4 [and should be a bit faster] */ assign_vue_slot(vue_map, VARYING_SLOT_PSIZ); assign_vue_slot(vue_map, BRW_VARYING_SLOT_NDC); - assign_vue_slot(vue_map, BRW_VARYING_SLOT_POS_DUPLICATE); - assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0); - assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1); - assign_vue_slot(vue_map, BRW_VARYING_SLOT_PAD); assign_vue_slot(vue_map, VARYING_SLOT_POS); break; case 6: