From e7da40afe84349a640fe15e3af408a0dfe880e85 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Fri, 23 Sep 2011 21:33:50 -0700 Subject: [PATCH] i965 new VS: don't share clip plane constants in pre-GEN6 In pre-GEN6, when using clip planes, both the vertex shader and the clipper need access to the client-supplied clip planes, since the vertex shader needs them to set the clip flags, and the clipper needs them to determine where to insert new vertices. With the old VS backend, we used a clever optimization to avoid placing duplicate copies of these planes in the CURBE: we used the same block of memory for both the clipper and vertex shader constants, with the clip planes at the front of it, and then we instructed the clipper to read just the initial part of this block containing the clip planes. This optimization was tricky, of dubious value, and not completely working in the new VS backend, so I've removed it. Now, when using the new VS backend, separate parts of the CURBE are used for the clipper and the vertex shader. Note that this doesn't affect the number of push constants available to the vertex shader, it simply causes the CURBE to occupy a few more bytes of URB memory. The old VS backend is unaffected. GEN6+, which does clipping entirely in hardware, is also unaffected. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_curbe.c | 9 ++++- src/mesa/drivers/dri/i965/brw_vec4.h | 2 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 21 ++--------- .../drivers/dri/i965/brw_vec4_visitor.cpp | 22 ++++++++++- src/mesa/drivers/dri/i965/brw_vs_state.c | 7 ++-- src/mesa/drivers/dri/i965/gen6_vs_state.c | 37 ++++++++++--------- 6 files changed, 55 insertions(+), 43 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 2ee2b464ed7..e1676de1e02 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -208,8 +208,13 @@ static void prepare_constant_buffer(struct brw_context *brw) } - /* The clipplanes are actually delivered to both CLIP and VS units. - * VS uses them to calculate the outcode bitmasks. + /* When using the old VS backend, the clipplanes are actually delivered to + * both CLIP and VS units. VS uses them to calculate the outcode bitmasks. + * + * When using the new VS backend, it is responsible for setting up its own + * clipplane constants if it needs them. This results in a slight waste of + * of curbe space, but the advantage is that the new VS backend can use its + * general-purpose uniform layout code to store the clipplanes. */ if (brw->curbe.clip_size) { GLuint offset = brw->curbe.clip_start * 16; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 876a6917201..b6864c326e9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -324,6 +324,7 @@ public: int first_non_payload_grf; int *virtual_grf_def; int *virtual_grf_use; + dst_reg userplane[MAX_CLIP_PLANES]; /** * This is the size to be used for an array with an element per @@ -385,6 +386,7 @@ public: void fail(const char *msg, ...); int virtual_grf_alloc(int size); + void setup_uniform_clipplane_values(); int setup_uniform_values(int loc, const glsl_type *type); void setup_builtin_uniform_values(ir_variable *ir); int setup_attributes(int payload_reg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index b58ebc69b4c..1eb8f3f78a3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -91,24 +91,6 @@ vec4_visitor::setup_attributes(int payload_reg) int vec4_visitor::setup_uniforms(int reg) { - /* User clip planes from curbe: - */ - if (c->key.nr_userclip && !c->key.uses_clip_distance) { - if (intel->gen >= 6) { - for (int i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, - (i % 2) * 4), 0, 4, 1); - } - reg += ALIGN(c->key.nr_userclip, 2) / 2; - } else { - for (int i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, - (i % 2) * 4), 0, 4, 1); - } - reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2; - } - } - /* The pre-gen6 VS requires that some push constants get loaded no * matter what, or the GPU would hang. */ @@ -598,6 +580,9 @@ vec4_visitor::generate_vs_instruction(vec4_instruction *instruction, bool vec4_visitor::run() { + if (c->key.nr_userclip && !c->key.uses_clip_distance) + setup_uniform_clipplane_values(); + /* Generate VS IR for main(). (the visitor only descends into * functions called "main"). */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index a32451fa2fe..ee3b2a8ba5c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -543,6 +543,24 @@ vec4_visitor::setup_uniform_values(int loc, const glsl_type *type) } } +void +vec4_visitor::setup_uniform_clipplane_values() +{ + int compacted_clipplane_index = 0; + for (int i = 0; i < MAX_CLIP_PLANES; ++i) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + this->uniform_vector_size[this->uniforms] = 4; + this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms); + this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F; + for (int j = 0; j < 4; ++j) { + c->prog_data.param[this->uniforms * 4 + j] = &ctx->Transform._ClipUserPlane[i][j]; + } + ++compacted_clipplane_index; + ++this->uniforms; + } + } +} + /* Our support for builtin uniforms is even scarier than non-builtin. * It sits on top of the PROG_STATE_VAR parameters that are * automatically updated from GL context state. @@ -1767,7 +1785,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) vec4_instruction *inst; inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]), - src_reg(c->userplane[i]))); + src_reg(this->userplane[i]))); inst->conditional_mod = BRW_CONDITIONAL_L; emit(OR(header1, src_reg(header1), 1u << i)); @@ -1825,7 +1843,7 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset) for (int i = 0; i + offset < c->key.nr_userclip && i < 4; ++i) { emit(DP4(dst_reg(brw_writemask(reg, 1 << i)), src_reg(output_reg[VERT_RESULT_HPOS]), - src_reg(c->userplane[i + offset]))); + src_reg(this->userplane[i + offset]))); } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index a01b614ffff..8d86c124216 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -92,8 +92,8 @@ brw_prepare_vs_unit(struct brw_context *brw) vs->thread3.dispatch_grf_start_reg = 1; vs->thread3.urb_entry_read_offset = 0; - /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */ + if (ctx->Transform.ClipPlanesEnabled && !brw->vs.prog_data->uses_new_param_layout) { /* Note that we read in the userclip planes as well, hence * clip_start: */ @@ -177,7 +177,8 @@ const struct brw_tracked_state brw_vs_unit = { BRW_NEW_PROGRAM_CACHE | BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | - BRW_NEW_URB_FENCE), + BRW_NEW_URB_FENCE | + BRW_NEW_VERTEX_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .prepare = brw_prepare_vs_unit, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index afb4acec326..0f6f6a7e062 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -66,24 +66,6 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) 4 * sizeof(float), 32, &brw->vs.push_const_offset); - /* This should be loaded like any other param, but it's ad-hoc - * until we redo the VS backend. - */ - if (!uses_clip_distance) { - for (i = 0; i < MAX_CLIP_PLANES; i++) { - if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { - memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float)); - param += 4; - params_uploaded++; - } - } - } - /* Align to a reg for convenience for brw_vs_emit.c */ - if (params_uploaded & 1) { - param += 4; - params_uploaded++; - } - if (brw->vs.prog_data->uses_new_param_layout) { for (i = 0; i < brw->vs.prog_data->nr_params; i++) { *param = *brw->vs.prog_data->param[i]; @@ -91,6 +73,25 @@ gen6_prepare_vs_push_constants(struct brw_context *brw) } params_uploaded += brw->vs.prog_data->nr_params / 4; } else { + /* This should be loaded like any other param, but it's ad-hoc + * until we redo the VS backend. + */ + if (!uses_clip_distance) { + for (i = 0; i < MAX_CLIP_PLANES; i++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << i)) { + memcpy(param, ctx->Transform._ClipUserPlane[i], 4 * sizeof(float)); + param += 4; + params_uploaded++; + } + } + } + + /* Align to a reg for convenience for brw_vs_emit.c */ + if (params_uploaded & 1) { + param += 4; + params_uploaded++; + } + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { if (brw->vs.constant_map[i] != -1) { memcpy(param + brw->vs.constant_map[i] * 4, -- 2.30.2