From d92f593d8776ec157ad0e7fa2ee8c9a17fd744ce Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 13 Feb 2014 21:37:50 -0800 Subject: [PATCH] i965/fs: Use conditional sends to do FB writes on HSW+. This drops the MOVs for header setup, which are totally mis-scheduled. total instructions in shared programs: 1590047 -> 1589331 (-0.05%) instructions in affected programs: 43729 -> 43013 (-1.64%) GAINED: 0 LOST: 0 glb27-trex: x before + after +-----------------------------------------------------------------------------+ | + x xx + + + | | ++ + xxx ++x xx + ** *x+ + + + x * | |+x xx x* x+++xx*x*xx+++*+*xx++** *x* x+***x*+xx+* + * + + *| | |__|__________MA___A___________|___| | +-----------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 49 62.33 65.41 63.49 63.53449 0.62757822 + 50 62.28 65.4 63.7 63.6982 0.656564 No difference proven at 95.0% confidence Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 -- .../drivers/dri/i965/brw_fs_generator.cpp | 22 +++++++++------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 14 +++++++++- .../drivers/dri/i965/gen8_fs_generator.cpp | 26 ++++++++++++++----- 4 files changed, 46 insertions(+), 18 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 8ab043fd63c..5360b56bcfe 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2241,8 +2241,6 @@ void brw_fb_WRITE(struct brw_compile *p, } else { insn = next_insn(p, BRW_OPCODE_SEND); } - /* The execution mask is ignored for render target writes. */ - insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; if (brw->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 00f19dcac71..ee13ced9e57 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -114,18 +114,22 @@ fs_generator::generate_fb_write(fs_inst *inst) brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - if ((fp && fp->UsesKill) || c->key.alpha_test_func) { - struct brw_reg pixel_mask; + if (inst->header_present) { + /* On HSW, the GPU will use the predicate on SENDC, unless the header is + * present. + */ + if (!brw->is_haswell && ((fp && fp->UsesKill) || + c->key.alpha_test_func)) { + struct brw_reg pixel_mask; - if (brw->gen >= 6) - pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); - else - pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + if (brw->gen >= 6) + pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + else + pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - brw_MOV(p, pixel_mask, brw_flag_reg(0, 1)); - } + brw_MOV(p, pixel_mask, brw_flag_reg(0, 1)); + } - if (inst->header_present) { if (brw->gen >= 6) { brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); brw_MOV(p, diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 45b053d540e..70b7c663a8c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -2743,7 +2743,7 @@ fs_visitor::emit_fb_writes() * thread message and on all dual-source messages." */ if (brw->gen >= 6 && - !this->fp->UsesKill && + (brw->is_haswell || brw->gen >= 8 || !this->fp->UsesKill) && !do_dual_src && c->key.nr_color_regions == 1) { header_present = false; @@ -2840,6 +2840,10 @@ fs_visitor::emit_fb_writes() inst->mlen = nr - base_mrf; inst->eot = true; inst->header_present = header_present; + if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) { + inst->predicate = BRW_PREDICATE_NORMAL; + inst->flag_subreg = 1; + } c->prog_data.dual_src_blend = true; this->current_annotation = NULL; @@ -2885,6 +2889,10 @@ fs_visitor::emit_fb_writes() inst->mlen = nr - base_mrf; inst->eot = eot; inst->header_present = header_present; + if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) { + inst->predicate = BRW_PREDICATE_NORMAL; + inst->flag_subreg = 1; + } } if (c->key.nr_color_regions == 0) { @@ -2902,6 +2910,10 @@ fs_visitor::emit_fb_writes() inst->mlen = nr - base_mrf; inst->eot = true; inst->header_present = header_present; + if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) { + inst->predicate = BRW_PREDICATE_NORMAL; + inst->flag_subreg = 1; + } } this->current_annotation = NULL; diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp index 332b14f6031..ea1632091e8 100644 --- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp @@ -61,14 +61,21 @@ gen8_fs_generator::mark_surface_used(unsigned surf_index) void gen8_fs_generator::generate_fb_write(fs_inst *ir) { - if (fp && fp->UsesKill) { - gen8_instruction *mov = - MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW), - brw_flag_reg(0, 1)); - gen8_set_mask_control(mov, BRW_MASK_DISABLE); - } + /* Disable the discard condition while setting up the header. */ + default_state.predicate = BRW_PREDICATE_NONE; + default_state.predicate_inverse = false; + default_state.flag_subreg_nr = 0; if (ir->header_present) { + /* The GPU will use the predicate on SENDC, unless the header is present. + */ + if (fp && fp->UsesKill) { + gen8_instruction *mov = + MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW), + brw_flag_reg(0, 1)); + gen8_set_mask_control(mov, BRW_MASK_DISABLE); + } + gen8_instruction *mov = MOV_RAW(brw_message_reg(ir->base_mrf), brw_vec8_grf(0, 0)); gen8_set_exec_size(mov, BRW_EXECUTE_16); @@ -88,6 +95,13 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir) } } + /* Set the predicate back to get the conditional write if necessary for + * discards. + */ + default_state.predicate = ir->predicate; + default_state.predicate_inverse = ir->predicate_inverse; + default_state.flag_subreg_nr = ir->flag_subreg; + gen8_instruction *inst = next_inst(BRW_OPCODE_SENDC); gen8_set_dst(brw, inst, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW)); gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf)); -- 2.30.2