i965: Handle rasterizer discard in the clipper rather than GS on Gen6.
authorKenneth Graunke <kenneth@whitecape.org>
Fri, 17 May 2013 15:49:52 +0000 (08:49 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 20 May 2013 20:03:18 +0000 (13:03 -0700)
This has more of a negative impact than the previous patch, as on Gen6
passing primitives through to the clipper means we actually have to make
the GS thread write them to the URB.

I don't see another good solution though, and rasterizer discard is not
the most common of cases, so hopefully it won't be too terrible.

v2: Add a perf_debug; resolve rebase conflicts on the brw dirty flags;
    remove the rasterizer_discard field from brw_gs_prog_key.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net> [v1]
Reviewed-by: Paul Berry <stereotype441@gmail.com>
src/mesa/drivers/dri/i965/brw_gs.c
src/mesa/drivers/dri/i965/brw_gs.h
src/mesa/drivers/dri/i965/brw_gs_emit.c
src/mesa/drivers/dri/i965/gen6_clip_state.c

index a432b76fa779499dc03e4469cebabcc8117325e5..f354dd9f62575ff65deb52e5a5e0ecf1f9b31d7c 100644 (file)
@@ -214,12 +214,6 @@ static void populate_key( struct brw_context *brw,
                swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
          }
       }
-      /* On Gen6, GS is also used for rasterizer discard. */
-      /* BRW_NEW_RASTERIZER_DISCARD */
-      if (ctx->RasterDiscard) {
-         key->need_gs_prog = true;
-         key->rasterizer_discard = true;
-      }
    } else {
       /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
        * into simpler primitives.
@@ -259,8 +253,7 @@ const struct brw_tracked_state brw_gs_prog = {
    .dirty = {
       .mesa  = (_NEW_LIGHT),
       .brw   = (BRW_NEW_PRIMITIVE |
-                BRW_NEW_TRANSFORM_FEEDBACK |
-                BRW_NEW_RASTERIZER_DISCARD),
+                BRW_NEW_TRANSFORM_FEEDBACK),
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = brw_upload_gs_prog
index f10d8e589ad07e4645f2542dda3f0bfeced33107..9a901d55cc013bb4009e823ace5ab7895c20526a 100644 (file)
@@ -49,7 +49,6 @@ struct brw_gs_prog_key {
 
    GLuint pv_first:1;
    GLuint need_gs_prog:1;
-   GLuint rasterizer_discard:1;
 
    /**
     * Number of varyings that are output to transform feedback.
index 87ff9f0713947115f33c1ab3713813d431b8b84a..cbfc6aab2e4afa76e256db05f957522b7698cef9 100644 (file)
@@ -200,28 +200,6 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c,
    }
 }
 
-/**
- * De-allocate the URB entry that was previously allocated to this thread
- * (without writing any vertex data to it), and terminate the thread.  This is
- * used to implement RASTERIZER_DISCARD functionality.
- */
-static void brw_gs_terminate(struct brw_gs_compile *c)
-{
-   struct brw_compile *p = &c->func;
-   brw_urb_WRITE(p,
-                 retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */
-                 0, /* msg_reg_nr */
-                 c->reg.header, /* src0 */
-                 false, /* allocate */
-                 false, /* used */
-                 1, /* msg_length */
-                 0, /* response_length */
-                 true, /* eot */
-                 true, /* writes_complete */
-                 0, /* offset */
-                 BRW_URB_SWIZZLE_NONE);
-}
-
 /**
  * Send an FF_SYNC message to ensure that all previously spawned GS threads
  * have finished sending primitives down the pipeline, and to allocate a URB
@@ -484,14 +462,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
 
    brw_gs_ff_sync(c, 1);
 
-   /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so
-    * release the URB that was just allocated, and terminate the thread.
-    */
-   if (key->rasterizer_discard) {
-      brw_gs_terminate(c);
-      return;
-   }
-
    brw_gs_overwrite_header_dw2_from_r0(c);
    switch (num_verts) {
    case 1:
index cd2a8bfbfad3b8b9f9d6ee3831e522e39065b631..a05e42df73e26d41c5999cd679a971b2f28c6250 100644 (file)
@@ -76,6 +76,13 @@ upload_clip_state(struct brw_context *brw)
       dw2 |= GEN6_CLIP_GB_TEST;
    }
 
+   /* BRW_NEW_RASTERIZER_DISCARD */
+   if (ctx->RasterDiscard) {
+      dw2 |= GEN6_CLIP_MODE_REJECT_ALL;
+      perf_debug("Rasterizer discard is currently implemented via the clipper; "
+                 "having the GS not write primitives would likely be faster.");
+   }
+
    BEGIN_BATCH(4);
    OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
    OUT_BATCH(dw1);
@@ -93,7 +100,9 @@ upload_clip_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_clip_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS,
-      .brw   = BRW_NEW_CONTEXT | BRW_NEW_META_IN_PROGRESS,
+      .brw   = BRW_NEW_CONTEXT |
+               BRW_NEW_META_IN_PROGRESS |
+               BRW_NEW_RASTERIZER_DISCARD,
       .cache = CACHE_NEW_WM_PROG
    },
    .emit = upload_clip_state,