From 037a901a5b201ed3c45595074f95d46b196fb511 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 17 May 2013 08:49:52 -0700 Subject: [PATCH] i965: Handle rasterizer discard in the clipper rather than GS on Gen6. This has more of a negative impact than the previous patch, as on Gen6 passing primitives through to the clipper means we actually have to make the GS thread write them to the URB. I don't see another good solution though, and rasterizer discard is not the most common of cases, so hopefully it won't be too terrible. v2: Add a perf_debug; resolve rebase conflicts on the brw dirty flags; remove the rasterizer_discard field from brw_gs_prog_key. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt [v1] Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_gs.c | 9 +------ src/mesa/drivers/dri/i965/brw_gs.h | 1 - src/mesa/drivers/dri/i965/brw_gs_emit.c | 30 --------------------- src/mesa/drivers/dri/i965/gen6_clip_state.c | 11 +++++++- 4 files changed, 11 insertions(+), 40 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index a432b76fa77..f354dd9f625 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -214,12 +214,6 @@ static void populate_key( struct brw_context *brw, swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; } } - /* On Gen6, GS is also used for rasterizer discard. */ - /* BRW_NEW_RASTERIZER_DISCARD */ - if (ctx->RasterDiscard) { - key->need_gs_prog = true; - key->rasterizer_discard = true; - } } else { /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP * into simpler primitives. @@ -259,8 +253,7 @@ const struct brw_tracked_state brw_gs_prog = { .dirty = { .mesa = (_NEW_LIGHT), .brw = (BRW_NEW_PRIMITIVE | - BRW_NEW_TRANSFORM_FEEDBACK | - BRW_NEW_RASTERIZER_DISCARD), + BRW_NEW_TRANSFORM_FEEDBACK), .cache = CACHE_NEW_VS_PROG }, .emit = brw_upload_gs_prog diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index f10d8e589ad..9a901d55cc0 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -49,7 +49,6 @@ struct brw_gs_prog_key { GLuint pv_first:1; GLuint need_gs_prog:1; - GLuint rasterizer_discard:1; /** * Number of varyings that are output to transform feedback. diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 87ff9f07139..cbfc6aab2e4 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -200,28 +200,6 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, } } -/** - * De-allocate the URB entry that was previously allocated to this thread - * (without writing any vertex data to it), and terminate the thread. This is - * used to implement RASTERIZER_DISCARD functionality. - */ -static void brw_gs_terminate(struct brw_gs_compile *c) -{ - struct brw_compile *p = &c->func; - brw_urb_WRITE(p, - retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */ - 0, /* msg_reg_nr */ - c->reg.header, /* src0 */ - false, /* allocate */ - false, /* used */ - 1, /* msg_length */ - 0, /* response_length */ - true, /* eot */ - true, /* writes_complete */ - 0, /* offset */ - BRW_URB_SWIZZLE_NONE); -} - /** * Send an FF_SYNC message to ensure that all previously spawned GS threads * have finished sending primitives down the pipeline, and to allocate a URB @@ -484,14 +462,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, brw_gs_ff_sync(c, 1); - /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so - * release the URB that was just allocated, and terminate the thread. - */ - if (key->rasterizer_discard) { - brw_gs_terminate(c); - return; - } - brw_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index cd2a8bfbfad..a05e42df73e 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -76,6 +76,13 @@ upload_clip_state(struct brw_context *brw) dw2 |= GEN6_CLIP_GB_TEST; } + /* BRW_NEW_RASTERIZER_DISCARD */ + if (ctx->RasterDiscard) { + dw2 |= GEN6_CLIP_MODE_REJECT_ALL; + perf_debug("Rasterizer discard is currently implemented via the clipper; " + "having the GS not write primitives would likely be faster."); + } + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(dw1); @@ -93,7 +100,9 @@ upload_clip_state(struct brw_context *brw) const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS, - .brw = BRW_NEW_CONTEXT | BRW_NEW_META_IN_PROGRESS, + .brw = BRW_NEW_CONTEXT | + BRW_NEW_META_IN_PROGRESS | + BRW_NEW_RASTERIZER_DISCARD, .cache = CACHE_NEW_WM_PROG }, .emit = upload_clip_state, -- 2.30.2