From 018ea68d8780ab5baeef0b8122b8410e5e55ae6d Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 27 Sep 2011 12:57:08 -0700 Subject: [PATCH] i965 Gen6+: De-compact clip planes. Previously, if the user enabled a non-consecutive set of clip planes (e.g. 0, 1, and 3), the driver would compact them down to a consecutive set starting at 0. This optimization was of dubious value, and complicated the implementation of gl_ClipDistance. This patch changes the driver so that with Gen6 and later chipsets, we no longer compact the clip planes. However, we still discard any clip planes beyond the highest number that is in use, so performance should not be affected for applications that use clip planes consecutively from 0. With chipsets previous to Gen6, we still compact the clip planes, since the pre-Gen6 clipper thread relies on this behavior. Reviewed-by: Ian Romanick Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_state.h | 5 --- .../drivers/dri/i965/brw_vec4_visitor.cpp | 36 ++++++++++++------- src/mesa/drivers/dri/i965/brw_vs.c | 17 ++++++--- src/mesa/drivers/dri/i965/brw_vs.h | 15 ++++---- src/mesa/drivers/dri/i965/brw_vs_emit.c | 14 ++++---- src/mesa/drivers/dri/i965/gen6_clip_state.c | 28 ++------------- src/mesa/drivers/dri/i965/gen7_clip_state.c | 9 ++--- 7 files changed, 56 insertions(+), 68 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index fabf0c0d26a..6fc95eb646e 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -213,9 +213,4 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset, unsigned int gen7_depth_format(struct brw_context *brw); -/* gen6_clip_state.c */ -uint32_t -brw_compute_userclip_flags(bool uses_clip_distance, - GLbitfield clip_planes_enabled); - #endif diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 680bd7d0520..b26700f7494 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -559,18 +559,29 @@ vec4_visitor::setup_uniform_clipplane_values() { gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); + /* Pre-Gen6, we compact clip planes. For example, if the user + * enables just clip planes 0, 1, and 3, we will enable clip planes + * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip + * plane 2. This simplifies the implementation of the Gen6 clip + * thread. + * + * In Gen6 and later, we don't compact clip planes, because this + * simplifies the implementation of gl_ClipDistance. + */ int compacted_clipplane_index = 0; - for (int i = 0; i < MAX_CLIP_PLANES; ++i) { - if (c->key.userclip_planes_enabled & (1 << i)) { - this->uniform_vector_size[this->uniforms] = 4; - this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms); - this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F; - for (int j = 0; j < 4; ++j) { - c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j]; - } - ++compacted_clipplane_index; - ++this->uniforms; + for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) { + if (intel->gen < 6 && + !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) { + continue; + } + this->uniform_vector_size[this->uniforms] = 4; + this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms); + this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F; + for (int j = 0; j < 4; ++j) { + c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j]; } + ++compacted_clipplane_index; + ++this->uniforms; } } @@ -1807,7 +1818,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg) } current_annotation = "Clipping flags"; - for (i = 0; i < c->key.nr_userclip_planes; i++) { + for (i = 0; i < c->key.nr_userclip_plane_consts; i++) { vec4_instruction *inst; inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]), @@ -1883,7 +1894,8 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset) clip_vertex = VERT_RESULT_HPOS; } - for (int i = 0; i + offset < c->key.nr_userclip_planes && i < 4; ++i) { + for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4; + ++i) { emit(DP4(dst_reg(brw_writemask(reg, 1 << i)), src_reg(output_reg[clip_vertex]), src_reg(this->userplane[i + offset]))); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 14e91d87b2d..f671223b8c2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -276,7 +276,8 @@ do_vs_prog(struct brw_context *brw, static void brw_upload_vs_prog(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; @@ -290,10 +291,16 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.program_string_id = vp->id; key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); key.uses_clip_distance = vp->program.UsesClipDistance; - if (!key.uses_clip_distance) { - key.userclip_planes_enabled = ctx->Transform.ClipPlanesEnabled; - key.nr_userclip_planes - = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + if (key.userclip_active && !key.uses_clip_distance) { + if (intel->gen < 6) { + key.nr_userclip_plane_consts + = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + key.userclip_planes_enabled_gen_4_5 + = ctx->Transform.ClipPlanesEnabled; + } else { + key.nr_userclip_plane_consts + = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; + } } key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index b8d11dfa954..85a1d8247b9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -53,10 +53,10 @@ struct brw_vs_prog_key { GLuint userclip_active:1; /** - * Number of user clip planes active. Zero if the shader uses - * gl_ClipDistance. + * How many user clipping planes are being uploaded to the vertex shader as + * push constants. */ - GLuint nr_userclip_planes:4; + GLuint nr_userclip_plane_consts:4; /** * True if the shader uses gl_ClipDistance, regardless of whether any clip @@ -65,10 +65,13 @@ struct brw_vs_prog_key { GLuint uses_clip_distance:1; /** - * Which user clip planes are active. Zero if the shader uses - * gl_ClipDistance. + * For pre-Gen6 hardware, a bitfield indicating which clipping planes are + * enabled. This is used to compact clip planes. + * + * For Gen6 and later hardware, clip planes are not compacted, so this + * value is zero to avoid provoking unnecessary shader recompiles. */ - GLuint userclip_planes_enabled:MAX_CLIP_PLANES; + GLuint userclip_planes_enabled_gen_4_5:MAX_CLIP_PLANES; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 88455807ba3..7326b3af2a2 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -204,17 +204,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ if (c->key.userclip_active) { if (intel->gen >= 6) { - for (i = 0; i < c->key.nr_userclip_planes; i++) { + for (i = 0; i <= c->key.nr_userclip_plane_consts; i++) { c->userplane[i] = stride(brw_vec4_grf(reg + i / 2, (i % 2) * 4), 0, 4, 1); } - reg += ALIGN(c->key.nr_userclip_planes, 2) / 2; + reg += ALIGN(c->key.nr_userclip_plane_consts, 2) / 2; } else { - for (i = 0; i < c->key.nr_userclip_planes; i++) { + for (i = 0; i < c->key.nr_userclip_plane_consts; i++) { c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2, (i % 2) * 4), 0, 4, 1); } - reg += (ALIGN(6 + c->key.nr_userclip_planes, 4) / 4) * 2; + reg += (ALIGN(6 + c->key.nr_userclip_plane_consts, 4) / 4) * 2; } } @@ -239,7 +239,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ if (intel->gen >= 6) { /* We can only load 32 regs of push constants. */ - max_constant = 32 * 2 - c->key.nr_userclip_planes; + max_constant = 32 * 2 - c->key.nr_userclip_plane_consts; } else { max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; } @@ -1565,7 +1565,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Set the user clip distances in dword 8-15. (m3-4)*/ if (c->key.userclip_active) { - for (i = 0; i < c->key.nr_userclip_planes; i++) { + for (i = 0; i < c->key.nr_userclip_plane_consts; i++) { struct brw_reg m; if (i < 4) m = brw_message_reg(3); @@ -1593,7 +1593,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) header1, brw_imm_ud(0x7ff<<8)); } - for (i = 0; i < c->key.nr_userclip_planes; i++) { + for (i = 0; i < c->key.nr_userclip_plane_consts; i++) { brw_set_conditionalmod(p, BRW_CONDITIONAL_L); brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<vertex_program; - if (!ctx->Transform.DepthClamp) depth_clamp = GEN6_CLIP_Z_TEST; @@ -79,8 +56,7 @@ upload_clip_state(struct brw_context *brw) } /* _NEW_TRANSFORM */ - userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance, - ctx->Transform.ClipPlanesEnabled); + userclip = ctx->Transform.ClipPlanesEnabled; BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); @@ -101,7 +77,7 @@ upload_clip_state(struct brw_context *brw) const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT, - .brw = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c index 5458ce81046..2f512f59346 100644 --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c @@ -39,10 +39,6 @@ upload_clip_state(struct brw_context *brw) /* _NEW_BUFFERS */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; - /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; - dw1 |= GEN7_CLIP_EARLY_CULL; /* _NEW_POLYGON */ @@ -86,8 +82,7 @@ upload_clip_state(struct brw_context *brw) } /* _NEW_TRANSFORM */ - userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance, - ctx->Transform.ClipPlanesEnabled); + userclip = ctx->Transform.ClipPlanesEnabled; BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); @@ -111,7 +106,7 @@ const struct brw_tracked_state gen7_clip_state = { _NEW_POLYGON | _NEW_LIGHT | _NEW_TRANSFORM), - .brw = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_clip_state, -- 2.30.2