From 9f3d3216cf25d8ffed4d72fbce6feacbc2990e4b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 13 Feb 2012 10:00:14 -0800 Subject: [PATCH] i965: Make the userclip flag for the VUE map come from VS prog data. This reduces recomputation of state based on non-clipping-related transform changes, and is a step toward removing VUE map recomputation. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_clip.c | 6 +++--- src/mesa/drivers/dri/i965/brw_context.h | 4 ++-- src/mesa/drivers/dri/i965/brw_gs.c | 10 ++++------ src/mesa/drivers/dri/i965/brw_sf.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 3 +-- src/mesa/drivers/dri/i965/brw_vs.c | 9 +++++++-- src/mesa/drivers/dri/i965/brw_vs_emit.c | 4 ++-- src/mesa/drivers/dri/i965/gen6_sf_state.c | 12 +++--------- src/mesa/drivers/dri/i965/gen7_sf_state.c | 10 +++------- src/mesa/drivers/dri/i965/gen7_sol_state.c | 8 +++----- 10 files changed, 29 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 50cc2462520..5b5f551948e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -69,7 +69,7 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - brw_compute_vue_map(&c.vue_map, intel, c.key.nr_userclip > 0, c.key.attrs); + brw_compute_vue_map(&c.vue_map, intel, brw->vs.prog_data); /* nr_regs is the number of registers filled by reading data from the VUE. * This program accesses the entire VUE, so nr_regs needs to be the size of @@ -145,12 +145,12 @@ brw_upload_clip_prog(struct brw_context *brw) */ /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->intel.reduced_primitive; - /* CACHE_NEW_VS_PROG */ + /* CACHE_NEW_VS_PROG (also part of VUE map) */ key.attrs = brw->vs.prog_data->outputs_written; /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); - /* _NEW_TRANSFORM */ + /* _NEW_TRANSFORM (also part of VUE map)*/ key.nr_userclip = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); if (intel->gen == 5) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 72e505940e9..c6860a76d50 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -400,6 +400,7 @@ struct brw_vs_prog_data { bool uses_new_param_layout; bool uses_vertexid; + bool userclip; }; @@ -1046,8 +1047,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen); /* brw_vs.c */ void brw_compute_vue_map(struct brw_vue_map *vue_map, const struct intel_context *intel, - bool userclip_active, - GLbitfield64 outputs_written); + const struct brw_vs_prog_data *prog_data); gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx); /* brw_wm.c */ diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index f9c4f6aa014..c6132df26d1 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -56,8 +56,7 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - /* The geometry shader needs to access the entire VUE. */ - brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs); + brw_compute_vue_map(&c.vue_map, intel, brw->vs.prog_data); c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = NULL; @@ -166,7 +165,7 @@ static void populate_key( struct brw_context *brw, memset(key, 0, sizeof(*key)); - /* CACHE_NEW_VS_PROG */ + /* CACHE_NEW_VS_PROG (part of VUE map) */ key->attrs = brw->vs.prog_data->outputs_written; /* BRW_NEW_PRIMITIVE */ @@ -181,8 +180,8 @@ static void populate_key( struct brw_context *brw, key->pv_first = true; } - /* _NEW_TRANSFORM */ - key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); + /* CACHE_NEW_VS_PROG (part of VUE map)*/ + key->userclip_active = brw->vs.prog_data->userclip; if (intel->gen >= 7) { /* On Gen7 and later, we don't use GS (yet). */ @@ -267,7 +266,6 @@ brw_upload_gs_prog(struct brw_context *brw) const struct brw_tracked_state brw_gs_prog = { .dirty = { .mesa = (_NEW_LIGHT | - _NEW_TRANSFORM | _NEW_TRANSFORM_FEEDBACK | _NEW_RASTERIZER_DISCARD), .brw = BRW_NEW_PRIMITIVE, diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 54c27f98862..6c28d773c59 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -63,7 +63,7 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func, mem_ctx); c.key = *key; - brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs); + brw_compute_vue_map(&c.vue_map, intel, brw->vs.prog_data); c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel); c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset; c.nr_setup_regs = c.nr_attr_regs; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index edb8b2a10ed..5dfe1c1354c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -2252,8 +2252,7 @@ vec4_visitor::emit_urb_writes() /* FINISHME: edgeflag */ - brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active, - c->prog_data.outputs_written); + brw_compute_vue_map(&c->vue_map, intel, &c->prog_data); /* First mrf is the g0-based message header containing URB handles and such, * which is implied in VS_OPCODE_URB_WRITE. diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index a2bce27b098..be82177f40d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -52,13 +52,18 @@ static inline void assign_vue_slot(struct brw_vue_map *vue_map, /** * Compute the VUE map for vertex shader program. + * + * Note that consumers of this map using cache keys must include + * prog_data->userclip and prog_data->outputs_written in their key + * (generated by CACHE_NEW_VS_PROG). */ void brw_compute_vue_map(struct brw_vue_map *vue_map, const struct intel_context *intel, - bool userclip_active, - GLbitfield64 outputs_written) + const struct brw_vs_prog_data *prog_data) { + bool userclip_active = prog_data->userclip; + GLbitfield64 outputs_written = prog_data->outputs_written; int i; vue_map->num_slots = 0; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 2b4b13a10d9..07fc0af6a9d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -173,6 +173,7 @@ static inline bool can_use_direct_mrf(int vert_result, */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { + struct brw_context *brw = c->func.brw; struct intel_context *intel = &c->func.brw->intel; GLuint i, reg = 0, slot; int attributes_in_vue; @@ -325,8 +326,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Allocate outputs. The non-position outputs go straight into message regs. */ - brw_compute_vue_map(&c->vue_map, intel, c->key.userclip_active, - c->prog_data.outputs_written); + brw_compute_vue_map(&c->vue_map, intel, &c->prog_data); c->first_output = reg; first_reladdr_output = get_first_reladdr_output(&c->vp->program); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 07b8e6dd837..c4e7c4c6488 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -114,8 +114,6 @@ upload_sf_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vue_map vue_map; uint32_t urb_entry_read_length; - /* CACHE_NEW_VS_PROG */ - GLbitfield64 vs_outputs_written = brw->vs.prog_data->outputs_written; /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); /* _NEW_LIGHT */ @@ -128,13 +126,10 @@ upload_sf_state(struct brw_context *brw) int urb_entry_read_offset = 1; float point_size; uint16_t attr_overrides[FRAG_ATTRIB_MAX]; - bool userclip_active; uint32_t point_sprite_origin; - /* _NEW_TRANSFORM */ - userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); - - brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written); + /* CACHE_NEW_VS_PROG */ + brw_compute_vue_map(&vue_map, intel, brw->vs.prog_data); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; if (urb_entry_read_length == 0) { /* Setting the URB entry read length to 0 causes undefined behavior, so @@ -342,8 +337,7 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_LINE | _NEW_SCISSOR | _NEW_BUFFERS | - _NEW_POINT | - _NEW_TRANSFORM), + _NEW_POINT), .brw = (BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index b215af2cf4a..49460b2802d 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -35,8 +35,6 @@ upload_sbe_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; struct brw_vue_map vue_map; uint32_t urb_entry_read_length; - /* CACHE_NEW_VS_PROG */ - GLbitfield64 vs_outputs_written = brw->vs.prog_data->outputs_written; /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); /* _NEW_LIGHT */ @@ -44,15 +42,14 @@ upload_sbe_state(struct brw_context *brw) uint32_t dw1, dw10, dw11; int i; int attr = 0, input_index = 0; - /* _NEW_TRANSFORM */ int urb_entry_read_offset = 1; - bool userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); uint16_t attr_overrides[FRAG_ATTRIB_MAX]; /* _NEW_BUFFERS */ bool render_to_fbo = ctx->DrawBuffer->Name != 0; uint32_t point_sprite_origin; - brw_compute_vue_map(&vue_map, intel, userclip_active, vs_outputs_written); + /* CACHE_NEW_VS_PROG */ + brw_compute_vue_map(&vue_map, intel, brw->vs.prog_data); urb_entry_read_length = (vue_map.num_slots + 1)/2 - urb_entry_read_offset; if (urb_entry_read_length == 0) { /* Setting the URB entry read length to 0 causes undefined behavior, so @@ -146,8 +143,7 @@ const struct brw_tracked_state gen7_sbe_state = { .dirty = { .mesa = (_NEW_LIGHT | _NEW_POINT | - _NEW_PROGRAM | - _NEW_TRANSFORM), + _NEW_PROGRAM), .brw = (BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 110c1662b3d..134153e6703 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -241,9 +241,8 @@ upload_sol_state(struct brw_context *brw) bool active = xfb_obj->Active && !xfb_obj->Paused; struct brw_vue_map vue_map; - /* _NEW_TRANSFORM, CACHE_NEW_VS_PROG */ - brw_compute_vue_map(&vue_map, intel, ctx->Transform.ClipPlanesEnabled != 0, - brw->vs.prog_data->outputs_written); + /* CACHE_NEW_VS_PROG */ + brw_compute_vue_map(&vue_map, intel, brw->vs.prog_data); if (active) { upload_3dstate_so_buffers(brw); @@ -264,8 +263,7 @@ const struct brw_tracked_state gen7_sol_state = { .dirty = { .mesa = (_NEW_RASTERIZER_DISCARD | _NEW_LIGHT | - _NEW_TRANSFORM_FEEDBACK | - _NEW_TRANSFORM), + _NEW_TRANSFORM_FEEDBACK), .brw = (BRW_NEW_BATCH | BRW_NEW_VERTEX_PROGRAM), .cache = CACHE_NEW_VS_PROG, -- 2.30.2