i965 Gen6+: De-compact clip planes.
authorPaul Berry <stereotype441@gmail.com>
Tue, 27 Sep 2011 19:57:08 +0000 (12:57 -0700)
committerPaul Berry <stereotype441@gmail.com>
Fri, 7 Oct 2011 02:29:14 +0000 (19:29 -0700)
Previously, if the user enabled a non-consecutive set of clip planes
(e.g. 0, 1, and 3), the driver would compact them down to a
consecutive set starting at 0.  This optimization was of dubious
value, and complicated the implementation of gl_ClipDistance.

This patch changes the driver so that with Gen6 and later chipsets, we
no longer compact the clip planes.  However, we still discard any clip
planes beyond the highest number that is in use, so performance should
not be affected for applications that use clip planes consecutively
from 0.

With chipsets previous to Gen6, we still compact the clip planes,
since the pre-Gen6 clipper thread relies on this behavior.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_state.h
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
src/mesa/drivers/dri/i965/brw_vs.c
src/mesa/drivers/dri/i965/brw_vs.h
src/mesa/drivers/dri/i965/brw_vs_emit.c
src/mesa/drivers/dri/i965/gen6_clip_state.c
src/mesa/drivers/dri/i965/gen7_clip_state.c

index fabf0c0d26af579b8350f922a0cf0c2b817d91b9..6fc95eb646e7e0a327f647af171c8b48169970b2 100644 (file)
@@ -213,9 +213,4 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
 unsigned int
 gen7_depth_format(struct brw_context *brw);
 
-/* gen6_clip_state.c */
-uint32_t
-brw_compute_userclip_flags(bool uses_clip_distance,
-                           GLbitfield clip_planes_enabled);
-
 #endif
index 680bd7d0520e49de7d99405477c8e4ff09a8b4e1..b26700f7494f2499579187f176343694327fea4f 100644 (file)
@@ -559,18 +559,29 @@ vec4_visitor::setup_uniform_clipplane_values()
 {
    gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
 
+   /* Pre-Gen6, we compact clip planes.  For example, if the user
+    * enables just clip planes 0, 1, and 3, we will enable clip planes
+    * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
+    * plane 2.  This simplifies the implementation of the Gen6 clip
+    * thread.
+    *
+    * In Gen6 and later, we don't compact clip planes, because this
+    * simplifies the implementation of gl_ClipDistance.
+    */
    int compacted_clipplane_index = 0;
-   for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
-      if (c->key.userclip_planes_enabled & (1 << i)) {
-         this->uniform_vector_size[this->uniforms] = 4;
-         this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
-         this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
-         for (int j = 0; j < 4; ++j) {
-            c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
-         }
-         ++compacted_clipplane_index;
-         ++this->uniforms;
+   for (int i = 0; i < c->key.nr_userclip_plane_consts; ++i) {
+      if (intel->gen < 6 &&
+          !(c->key.userclip_planes_enabled_gen_4_5 & (1 << i))) {
+         continue;
+      }
+      this->uniform_vector_size[this->uniforms] = 4;
+      this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
+      this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
+      for (int j = 0; j < 4; ++j) {
+         c->prog_data.param[this->uniforms * 4 + j] = &clip_planes[i][j];
       }
+      ++compacted_clipplane_index;
+      ++this->uniforms;
    }
 }
 
@@ -1807,7 +1818,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
       }
 
       current_annotation = "Clipping flags";
-      for (i = 0; i < c->key.nr_userclip_planes; i++) {
+      for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
         vec4_instruction *inst;
 
         inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
@@ -1883,7 +1894,8 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
       clip_vertex = VERT_RESULT_HPOS;
    }
 
-   for (int i = 0; i + offset < c->key.nr_userclip_planes && i < 4; ++i) {
+   for (int i = 0; i + offset < c->key.nr_userclip_plane_consts && i < 4;
+        ++i) {
       emit(DP4(dst_reg(brw_writemask(reg, 1 << i)),
                src_reg(output_reg[clip_vertex]),
                src_reg(this->userplane[i + offset])));
index 14e91d87b2dc63f11517f2a9796e78d7a4815825..f671223b8c2f06efd9daf7d64c01dd3d0221098b 100644 (file)
@@ -276,7 +276,8 @@ do_vs_prog(struct brw_context *brw,
 
 static void brw_upload_vs_prog(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
    struct brw_vs_prog_key key;
    struct brw_vertex_program *vp = 
       (struct brw_vertex_program *)brw->vertex_program;
@@ -290,10 +291,16 @@ static void brw_upload_vs_prog(struct brw_context *brw)
    key.program_string_id = vp->id;
    key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
    key.uses_clip_distance = vp->program.UsesClipDistance;
-   if (!key.uses_clip_distance) {
-      key.userclip_planes_enabled = ctx->Transform.ClipPlanesEnabled;
-      key.nr_userclip_planes
-         = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+   if (key.userclip_active && !key.uses_clip_distance) {
+      if (intel->gen < 6) {
+         key.nr_userclip_plane_consts
+            = _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+         key.userclip_planes_enabled_gen_4_5
+            = ctx->Transform.ClipPlanesEnabled;
+      } else {
+         key.nr_userclip_plane_consts
+            = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
+      }
    }
    key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
                        ctx->Polygon.BackMode != GL_FILL);
index b8d11dfa95478e6ef7f17d1d43406f0745406c02..85a1d8247b91b1e086696b3cd7002d9d79e047ac 100644 (file)
@@ -53,10 +53,10 @@ struct brw_vs_prog_key {
    GLuint userclip_active:1;
 
    /**
-    * Number of user clip planes active.  Zero if the shader uses
-    * gl_ClipDistance.
+    * How many user clipping planes are being uploaded to the vertex shader as
+    * push constants.
     */
-   GLuint nr_userclip_planes:4;
+   GLuint nr_userclip_plane_consts:4;
 
    /**
     * True if the shader uses gl_ClipDistance, regardless of whether any clip
@@ -65,10 +65,13 @@ struct brw_vs_prog_key {
    GLuint uses_clip_distance:1;
 
    /**
-    * Which user clip planes are active.  Zero if the shader uses
-    * gl_ClipDistance.
+    * For pre-Gen6 hardware, a bitfield indicating which clipping planes are
+    * enabled.  This is used to compact clip planes.
+    *
+    * For Gen6 and later hardware, clip planes are not compacted, so this
+    * value is zero to avoid provoking unnecessary shader recompiles.
     */
-   GLuint userclip_planes_enabled:MAX_CLIP_PLANES;
+   GLuint userclip_planes_enabled_gen_4_5:MAX_CLIP_PLANES;
 
    GLuint copy_edgeflag:1;
    GLuint point_coord_replace:8;
index 88455807ba300a2b9364aced5b2371ec5f84991e..7326b3af2a21d5258c94b000975fea6ad5ddf31d 100644 (file)
@@ -204,17 +204,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    if (c->key.userclip_active) {
       if (intel->gen >= 6) {
-        for (i = 0; i < c->key.nr_userclip_planes; i++) {
+        for (i = 0; i <= c->key.nr_userclip_plane_consts; i++) {
            c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
                                                  (i % 2) * 4), 0, 4, 1);
         }
-        reg += ALIGN(c->key.nr_userclip_planes, 2) / 2;
+        reg += ALIGN(c->key.nr_userclip_plane_consts, 2) / 2;
       } else {
-        for (i = 0; i < c->key.nr_userclip_planes; i++) {
+        for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
            c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
                                                  (i % 2) * 4), 0, 4, 1);
         }
-        reg += (ALIGN(6 + c->key.nr_userclip_planes, 4) / 4) * 2;
+        reg += (ALIGN(6 + c->key.nr_userclip_plane_consts, 4) / 4) * 2;
       }
 
    }
@@ -239,7 +239,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    if (intel->gen >= 6) {
       /* We can only load 32 regs of push constants. */
-      max_constant = 32 * 2 - c->key.nr_userclip_planes;
+      max_constant = 32 * 2 - c->key.nr_userclip_plane_consts;
    } else {
       max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
    }
@@ -1565,7 +1565,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
       /* Set the user clip distances in dword 8-15. (m3-4)*/
       if (c->key.userclip_active) {
-        for (i = 0; i < c->key.nr_userclip_planes; i++) {
+        for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
            struct brw_reg m;
            if (i < 4)
               m = brw_message_reg(3);
@@ -1593,7 +1593,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                 header1, brw_imm_ud(0x7ff<<8));
       }
 
-      for (i = 0; i < c->key.nr_userclip_planes; i++) {
+      for (i = 0; i < c->key.nr_userclip_plane_consts; i++) {
         brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
         brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
         brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
index 801b88fe047c37d79fe6a44d40c60b2dc09de35c..9b36af47dde7f9169cdd50ed6a767843549d075a 100644 (file)
 #include "brw_util.h"
 #include "intel_batchbuffer.h"
 
-uint32_t
-brw_compute_userclip_flags(bool uses_clip_distance,
-                           GLbitfield clip_planes_enabled)
-{
-   if (uses_clip_distance) {
-      /* When using gl_ClipDistance, it is up to the shader to decide which
-       * clip distance values to use.
-       */
-      return clip_planes_enabled;
-   } else {
-      /* When using clipping planes, we compact the ones that are in use so
-       * that they are always numbered consecutively from zero, so we need to
-       * enable clipping planes 0 through n-1 in the hardware regardless of
-       * which planes the user has selected.
-       */
-      return (1 << _mesa_bitcount_64(clip_planes_enabled)) - 1;
-   }
-}
-
 static void
 upload_clip_state(struct brw_context *brw)
 {
@@ -58,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
    uint32_t depth_clamp = 0;
    uint32_t provoking, userclip;
 
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_vertex_program *vp =
-      (struct brw_vertex_program *)brw->vertex_program;
-
    if (!ctx->Transform.DepthClamp)
       depth_clamp = GEN6_CLIP_Z_TEST;
 
@@ -79,8 +56,7 @@ upload_clip_state(struct brw_context *brw)
    }
 
    /* _NEW_TRANSFORM */
-   userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
-                                         ctx->Transform.ClipPlanesEnabled);
+   userclip = ctx->Transform.ClipPlanesEnabled;
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
@@ -101,7 +77,7 @@ upload_clip_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_clip_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_LIGHT,
-      .brw   = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
+      .brw   = BRW_NEW_CONTEXT,
       .cache = 0
    },
    .emit = upload_clip_state,
index 5458ce81046abbf28f168321ba5fcc2d1bfef3dd..2f512f59346d02651e6a1e4f9de4f0377fea2c47 100644 (file)
@@ -39,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
    /* _NEW_BUFFERS */
    GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
 
-   /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_vertex_program *vp =
-      (struct brw_vertex_program *)brw->vertex_program;
-
    dw1 |= GEN7_CLIP_EARLY_CULL;
 
    /* _NEW_POLYGON */
@@ -86,8 +82,7 @@ upload_clip_state(struct brw_context *brw)
    }
 
    /* _NEW_TRANSFORM */
-   userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
-                                         ctx->Transform.ClipPlanesEnabled);
+   userclip = ctx->Transform.ClipPlanesEnabled;
 
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
@@ -111,7 +106,7 @@ const struct brw_tracked_state gen7_clip_state = {
                 _NEW_POLYGON |
                 _NEW_LIGHT |
                 _NEW_TRANSFORM),
-      .brw   = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
+      .brw   = BRW_NEW_CONTEXT,
       .cache = 0
    },
    .emit = upload_clip_state,