From: Kenneth Graunke Date: Fri, 27 Feb 2015 06:55:54 +0000 (-0800) Subject: i965/fs: Make emit_shader_time_end() insert before EOT. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4ebeb71573ad44f7657810dc5dd2c9030e3e63db;p=mesa.git i965/fs: Make emit_shader_time_end() insert before EOT. Previously, we emitted the shader-time epilogue from emit_fb_writes(), during the middle of looping through color regions (or emit_urb_writes for the VS). This is duplicated several times and rather awkward. I need to fix a bug in our FB write handling, and it will be a lot easier if we move emit_shader_time_end() out of there. Now, we simply emit FB writes/URB writes, and subsequently have emit_shader_time_end() insert instructions before the final SEND with EOT. Not only is this simpler, it's actually a slight improvement: we now include the MOVs to set up the final FB write payload in our shader-time measurements. Note that INTEL_DEBUG=shader_time only exists on Gen7+, and uses send-from-GRF. (In the past, we might have hit trouble where both attempt to use MRFs for messages; that's not a problem now.) v2: Rebase on v3 of the previous patch and other shader_time fixes. Signed-off-by: Kenneth Graunke Reviewed-by: Topi Pohjolainen [v1] Acked-by: Matt Turner Cc: mesa-stable@lists.freedesktop.org --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index ee14a7a6483..89754ad9434 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -759,19 +759,24 @@ fs_visitor::emit_shader_time_end() unreachable("fs_visitor::emit_shader_time_end missing code"); } + /* Insert our code just before the final SEND with EOT. */ + exec_node *end = this->instructions.get_tail(); + assert(end && ((fs_inst *) end)->eot); + fs_inst *tm_read; fs_reg shader_end_time = get_timestamp(&tm_read); - emit(tm_read); + end->insert_before(tm_read); /* Check that there weren't any timestamp reset events (assuming these * were the only two timestamp reads that happened). */ fs_reg reset = shader_end_time; reset.set_smear(2); - fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u))); + fs_inst *test = AND(reg_null_d, reset, fs_reg(1u)); test->conditional_mod = BRW_CONDITIONAL_Z; test->force_writemask_all = true; - emit(IF(BRW_PREDICATE_NORMAL)); + end->insert_before(test); + end->insert_before(IF(BRW_PREDICATE_NORMAL)); fs_reg start = shader_start_time; start.negate = true; @@ -779,7 +784,7 @@ fs_visitor::emit_shader_time_end() diff.set_smear(0); fs_inst *add = ADD(diff, start, shader_end_time); add->force_writemask_all = true; - emit(add); + end->insert_before(add); /* If there were no instructions between the two timestamp gets, the diff * is 2 cycles. Remove that overhead, so I can forget about that when @@ -787,13 +792,13 @@ fs_visitor::emit_shader_time_end() */ add = ADD(diff, diff, fs_reg(-2u)); add->force_writemask_all = true; - emit(add); + end->insert_before(add); - emit(SHADER_TIME_ADD(type, diff)); - emit(SHADER_TIME_ADD(written_type, fs_reg(1u))); - emit(BRW_OPCODE_ELSE); - emit(SHADER_TIME_ADD(reset_type, fs_reg(1u))); - emit(BRW_OPCODE_ENDIF); + end->insert_before(SHADER_TIME_ADD(type, diff)); + end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u))); + end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width)); + end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u))); + end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width)); } fs_inst * @@ -3922,6 +3927,9 @@ fs_visitor::run_fs() emit_fb_writes(); + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + calculate_cfg(); optimize(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cdaba7f47d9..522238759aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3670,9 +3670,6 @@ fs_visitor::emit_fb_writes() fs_inst *inst; if (do_dual_src) { - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - emit_shader_time_end(); - this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB dual-source write"); inst = emit_single_fb_write(this->outputs[0], this->dual_src_output, @@ -3712,19 +3709,12 @@ fs_visitor::emit_fb_writes() if (brw->gen >= 6 && key->replicate_alpha && target != 0) src0_alpha = offset(outputs[0], 3); - if (target == key->nr_color_regions - 1 && - (INTEL_DEBUG & DEBUG_SHADER_TIME)) - emit_shader_time_end(); - inst = emit_single_fb_write(this->outputs[target], reg_undef, src0_alpha, this->output_components[target]); inst->target = target; } } else { - if (INTEL_DEBUG & DEBUG_SHADER_TIME) - emit_shader_time_end(); - /* Even if there's no color buffers enabled, we still need to send * alpha out the pipeline to our null renderbuffer to support * alpha-testing, alpha-to-coverage, and so on. @@ -3935,9 +3925,6 @@ fs_visitor::emit_urb_writes() if (length == 8 || last) flush = true; if (flush) { - if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME)) - emit_shader_time_end(); - fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), BRW_REGISTER_TYPE_F);