i965: Use 3DSTATE_CLIP's User Clip Distance Enable bitmask on Gen8+.
authorKenneth Graunke <kenneth@whitecape.org>
Wed, 16 Nov 2016 06:59:45 +0000 (22:59 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Thu, 24 Nov 2016 00:57:29 +0000 (16:57 -0800)
Gen6-7.5 specify the user clip distance enable bitmask in 3DSTATE_CLIP.
Gen8+ normally uses the new internal signalling mechanism to select the
one specified in the last enabled shader stage (3DSTATE_VS, DS, or GS).

This is a pretty good fit for Vulkan, or even newer GL, where the
bitmask comes entirely from the shader.  But with glClipPlane(),
this is dynamic state, and we have to listen to _NEW_TRASNFORM.

Clip plane enables are the only reason the VS/DS/GS atoms need to
listen to _NEW_TRANSFORM.  3DSTATE_CLIP already has to listen to it
in order to support ARB_clip_control settings.

Setting the "Use the 3DSTATE_CLIP bitmask" force enable bit allows
us to drop _NEW_TRANSFORM from all the shader stage atoms, so we can
re-emit them less often.

Improves performance of OglBatch7 (version 6) by 2.70773% +/- 0.491257%
(n = 38) at 1024x768 on Cherryview.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/gen6_clip_state.c
src/mesa/drivers/dri/i965/gen8_ds_state.c
src/mesa/drivers/dri/i965/gen8_gs_state.c
src/mesa/drivers/dri/i965/gen8_vs_state.c

index c4e0f27a393a59235054a51a3dfce368865e7b8b..84a51b48cc67a0310a865267637fa4fa187e7b4c 100644 (file)
@@ -2217,6 +2217,7 @@ enum brw_message_target {
 # define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8          (0 << 19)
 # define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4          (1 << 19)
 # define GEN7_CLIP_EARLY_CULL                           (1 << 18)
+# define GEN8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK     (1 << 17)
 # define GEN7_CLIP_CULLMODE_BOTH                        (0 << 16)
 # define GEN7_CLIP_CULLMODE_NONE                        (1 << 16)
 # define GEN7_CLIP_CULLMODE_FRONT                       (2 << 16)
index 17fef761ce353d42b392884153a6f944d14afae6..0b3c7f16f189bb2ee6e6539eae08f428d143b48f 100644 (file)
@@ -153,6 +153,15 @@ upload_clip_state(struct brw_context *brw)
    /* _NEW_TRANSFORM */
    dw2 |= (ctx->Transform.ClipPlanesEnabled <<
            GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT);
+
+   /* Have the hardware use the user clip distance clip test enable bitmask
+    * specified here in 3DSTATE_CLIP rather than the one in 3DSTATE_VS/DS/GS.
+    * We already listen to _NEW_TRANSFORM here, but the other atoms don't
+    * need to other than this.
+    */
+   if (brw->gen >= 8)
+      dw1 |= GEN8_CLIP_FORCE_USER_CLIP_DISTANCE_BITMASK;
+
    if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
       dw2 |= GEN6_CLIP_API_D3D;
    else
index 0ea145673b4166f8c1b8005021f8d48425ff4486..ee2f82e109826d8814d57c0a207fe2b65bcd92bc 100644 (file)
@@ -30,7 +30,6 @@ static void
 gen8_upload_ds_state(struct brw_context *brw)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
    const struct brw_stage_state *stage_state = &brw->tes.base;
    /* BRW_NEW_TESS_PROGRAMS */
    bool active = brw->tess_eval_program;
@@ -72,10 +71,7 @@ gen8_upload_ds_state(struct brw_context *brw)
                  GEN7_DS_SIMD8_DISPATCH_ENABLE : 0) |
                 (tes_prog_data->domain == BRW_TESS_DOMAIN_TRI ?
                  GEN7_DS_COMPUTE_W_COORDINATE_ENABLE : 0));
-      /* _NEW_TRANSFORM */
-      OUT_BATCH(SET_FIELD(ctx->Transform.ClipPlanesEnabled,
-                          GEN8_DS_USER_CLIP_DISTANCE) |
-                SET_FIELD(vue_prog_data->cull_distance_mask,
+      OUT_BATCH(SET_FIELD(vue_prog_data->cull_distance_mask,
                           GEN8_DS_USER_CULL_DISTANCE));
 
 
@@ -110,7 +106,7 @@ gen8_upload_ds_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_ds_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = 0,
       .brw   = BRW_NEW_BATCH |
                BRW_NEW_BLORP |
                BRW_NEW_TESS_PROGRAMS |
index c39dc61261fa549872bb05017352b6eb563f3a32..2b74f1bd5751e7b7aea85e77cac9ac17998cbf73 100644 (file)
@@ -30,7 +30,6 @@ static void
 gen8_upload_gs_state(struct brw_context *brw)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
    const struct brw_stage_state *stage_state = &brw->gs.base;
    /* BRW_NEW_GEOMETRY_PROGRAM */
    bool active = brw->geometry_program;
@@ -112,10 +111,8 @@ gen8_upload_gs_state(struct brw_context *brw)
       /* DW8 */
       OUT_BATCH(dw8);
 
-      /* DW9 / _NEW_TRANSFORM */
-      OUT_BATCH((vue_prog_data->cull_distance_mask |
-                 ctx->Transform.ClipPlanesEnabled <<
-                 GEN8_GS_USER_CLIP_DISTANCE_SHIFT) |
+      /* DW9 */
+      OUT_BATCH(vue_prog_data->cull_distance_mask |
                 (urb_entry_output_length << GEN8_GS_URB_OUTPUT_LENGTH_SHIFT) |
                 (urb_entry_write_offset <<
                  GEN8_GS_URB_ENTRY_OUTPUT_OFFSET_SHIFT));
@@ -138,7 +135,7 @@ gen8_upload_gs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_gs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = 0,
       .brw   = BRW_NEW_BATCH |
                BRW_NEW_BLORP |
                BRW_NEW_CONTEXT |
index aed6be08a6609ced8c7d61ad6433be1cccfcc7da..7b66da4b17c78863ff9dd7f16a16cbbd75c6e5ce 100644 (file)
@@ -33,7 +33,6 @@ static void
 upload_vs_state(struct brw_context *brw)
 {
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   struct gl_context *ctx = &brw->ctx;
    const struct brw_stage_state *stage_state = &brw->vs.base;
    uint32_t floating_point_mode = 0;
 
@@ -81,16 +80,13 @@ upload_vs_state(struct brw_context *brw)
              simd8_enable |
              GEN6_VS_ENABLE);
 
-   /* _NEW_TRANSFORM */
-   OUT_BATCH(vue_prog_data->cull_distance_mask |
-             (ctx->Transform.ClipPlanesEnabled <<
-              GEN8_VS_USER_CLIP_DISTANCE_SHIFT));
+   OUT_BATCH(vue_prog_data->cull_distance_mask);
    ADVANCE_BATCH();
 }
 
 const struct brw_tracked_state gen8_vs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = 0,
       .brw   = BRW_NEW_BATCH |
                BRW_NEW_BLORP |
                BRW_NEW_CONTEXT |