From fb5bf03a2092159166229eacf57c71587f762c57 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 21 Sep 2012 13:11:54 +0200 Subject: [PATCH] i965/fs: Move constant propagation to the same codebase as copy prop. This means that we don't get constant prop across into the first block after a BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it across control flow at some point. More importantly, with the next commit it will help avoid O(n^2) with instruction count runtime for shaders that have many constant moves. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 163 ------------------ src/mesa/drivers/dri/i965/brw_fs.h | 2 +- .../dri/i965/brw_fs_copy_propagation.cpp | 124 ++++++++++++- 3 files changed, 124 insertions(+), 165 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 27014133d4f..0545a74ec3d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants() c->prog_data.nr_pull_params = pull_uniform_count; } -/** - * Attempts to move immediate constants into the immediate - * constant slot of following instructions. - * - * Immediate constants are a bit tricky -- they have to be in the last - * operand slot, you can't do abs/negate on them, - */ - -bool -fs_visitor::propagate_constants() -{ - bool progress = false; - - calculate_live_intervals(); - - foreach_list(node, &this->instructions) { - fs_inst *inst = (fs_inst *)node; - - if (inst->opcode != BRW_OPCODE_MOV || - inst->predicated || - inst->dst.file != GRF || inst->src[0].file != IMM || - inst->dst.type != inst->src[0].type || - (c->dispatch_width == 16 && - (inst->force_uncompressed || inst->force_sechalf))) - continue; - - /* Don't bother with cases where we should have had the - * operation on the constant folded in GLSL already. - */ - if (inst->saturate) - continue; - - /* Found a move of a constant to a GRF. Find anything else using the GRF - * before it's written, and replace it with the constant if we can. - */ - for (fs_inst *scan_inst = (fs_inst *)inst->next; - !scan_inst->is_tail_sentinel(); - scan_inst = (fs_inst *)scan_inst->next) { - if (scan_inst->opcode == BRW_OPCODE_DO || - scan_inst->opcode == BRW_OPCODE_WHILE || - scan_inst->opcode == BRW_OPCODE_ELSE || - scan_inst->opcode == BRW_OPCODE_ENDIF) { - break; - } - - for (int i = 2; i >= 0; i--) { - if (scan_inst->src[i].file != GRF || - scan_inst->src[i].reg != inst->dst.reg || - scan_inst->src[i].reg_offset != inst->dst.reg_offset) - continue; - - /* Don't bother with cases where we should have had the - * operation on the constant folded in GLSL already. - */ - if (scan_inst->src[i].negate || scan_inst->src[i].abs) - continue; - - switch (scan_inst->opcode) { - case BRW_OPCODE_MOV: - scan_inst->src[i] = inst->src[0]; - progress = true; - break; - - case BRW_OPCODE_MUL: - case BRW_OPCODE_ADD: - if (i == 1) { - scan_inst->src[i] = inst->src[0]; - progress = true; - } else if (i == 0 && scan_inst->src[1].file != IMM) { - /* Fit this constant in by commuting the operands. - * Exception: we can't do this for 32-bit integer MUL - * because it's asymmetric. - */ - if (scan_inst->opcode == BRW_OPCODE_MUL && - (scan_inst->src[1].type == BRW_REGISTER_TYPE_D || - scan_inst->src[1].type == BRW_REGISTER_TYPE_UD)) - break; - scan_inst->src[0] = scan_inst->src[1]; - scan_inst->src[1] = inst->src[0]; - progress = true; - } - break; - - case BRW_OPCODE_CMP: - case BRW_OPCODE_IF: - if (i == 1) { - scan_inst->src[i] = inst->src[0]; - progress = true; - } else if (i == 0 && scan_inst->src[1].file != IMM) { - uint32_t new_cmod; - - new_cmod = brw_swap_cmod(scan_inst->conditional_mod); - if (new_cmod != ~0u) { - /* Fit this constant in by swapping the operands and - * flipping the test - */ - scan_inst->src[0] = scan_inst->src[1]; - scan_inst->src[1] = inst->src[0]; - scan_inst->conditional_mod = new_cmod; - progress = true; - } - } - break; - - case BRW_OPCODE_SEL: - if (i == 1) { - scan_inst->src[i] = inst->src[0]; - progress = true; - } else if (i == 0 && scan_inst->src[1].file != IMM) { - scan_inst->src[0] = scan_inst->src[1]; - scan_inst->src[1] = inst->src[0]; - - /* If this was predicated, flipping operands means - * we also need to flip the predicate. - */ - if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) { - scan_inst->predicate_inverse = - !scan_inst->predicate_inverse; - } - progress = true; - } - break; - - case SHADER_OPCODE_RCP: - /* The hardware doesn't do math on immediate values - * (because why are you doing that, seriously?), but - * the correct answer is to just constant fold it - * anyway. - */ - assert(i == 0); - if (inst->src[0].imm.f != 0.0f) { - scan_inst->opcode = BRW_OPCODE_MOV; - scan_inst->src[0] = inst->src[0]; - scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f; - progress = true; - } - break; - - case FS_OPCODE_PULL_CONSTANT_LOAD: - scan_inst->src[i] = inst->src[0]; - progress = true; - break; - - default: - break; - } - } - - if (scan_inst->dst.file == GRF && - scan_inst->overwrites_reg(inst->dst)) { - break; - } - } - } - - if (progress) - this->live_intervals_valid = false; - - return progress; -} - - bool fs_visitor::opt_algebraic() { @@ -2025,7 +1863,6 @@ fs_visitor::run() progress = remove_duplicate_mrf_writes() || progress; - progress = propagate_constants() || progress; progress = opt_algebraic() || progress; progress = opt_cse() || progress; progress = opt_copy_propagate() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4db9e90ed5e..269dd0aceb9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -240,12 +240,12 @@ public: void split_virtual_grfs(); void setup_pull_constants(); void calculate_live_intervals(); - bool propagate_constants(); bool opt_algebraic(); bool opt_cse(); bool opt_cse_local(fs_bblock *block, exec_list *aeb); bool opt_copy_propagate(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); + bool try_constant_propagate(fs_inst *inst, acp_entry *entry); bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block, exec_list *acp); bool register_coalesce(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 1870f43feba..6eff80285d7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -34,6 +34,9 @@ struct acp_entry : public exec_node { bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { + if (entry->src.file == IMM) + return false; + if (inst->src[arg].file != entry->dst.file || inst->src[arg].reg != entry->dst.reg || inst->src[arg].reg_offset != entry->dst.reg_offset) { @@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) return true; } + +bool +fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) +{ + bool progress = false; + + if (entry->src.file != IMM) + return false; + + for (int i = 2; i >= 0; i--) { + if (inst->src[i].file != entry->dst.file || + inst->src[i].reg != entry->dst.reg || + inst->src[i].reg_offset != entry->dst.reg_offset) + continue; + + /* Don't bother with cases that should have been taken care of by the + * GLSL compiler's constant folding pass. + */ + if (inst->src[i].negate || inst->src[i].abs) + continue; + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + inst->src[i] = entry->src; + progress = true; + break; + + case BRW_OPCODE_MUL: + case BRW_OPCODE_ADD: + if (i == 1) { + inst->src[i] = entry->src; + progress = true; + } else if (i == 0 && inst->src[1].file != IMM) { + /* Fit this constant in by commuting the operands. + * Exception: we can't do this for 32-bit integer MUL + * because it's asymmetric. + */ + if (inst->opcode == BRW_OPCODE_MUL && + (inst->src[1].type == BRW_REGISTER_TYPE_D || + inst->src[1].type == BRW_REGISTER_TYPE_UD)) + break; + inst->src[0] = inst->src[1]; + inst->src[1] = entry->src; + progress = true; + } + break; + + case BRW_OPCODE_CMP: + case BRW_OPCODE_IF: + if (i == 1) { + inst->src[i] = entry->src; + progress = true; + } else if (i == 0 && inst->src[1].file != IMM) { + uint32_t new_cmod; + + new_cmod = brw_swap_cmod(inst->conditional_mod); + if (new_cmod != ~0u) { + /* Fit this constant in by swapping the operands and + * flipping the test + */ + inst->src[0] = inst->src[1]; + inst->src[1] = entry->src; + inst->conditional_mod = new_cmod; + progress = true; + } + } + break; + + case BRW_OPCODE_SEL: + if (i == 1) { + inst->src[i] = entry->src; + progress = true; + } else if (i == 0 && inst->src[1].file != IMM) { + inst->src[0] = inst->src[1]; + inst->src[1] = entry->src; + + /* If this was predicated, flipping operands means + * we also need to flip the predicate. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NONE) { + inst->predicate_inverse = + !inst->predicate_inverse; + } + progress = true; + } + break; + + case SHADER_OPCODE_RCP: + /* The hardware doesn't do math on immediate values + * (because why are you doing that, seriously?), but + * the correct answer is to just constant fold it + * anyway. + */ + assert(i == 0); + if (inst->src[0].imm.f != 0.0f) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = entry->src; + inst->src[0].imm.f = 1.0f / inst->src[0].imm.f; + progress = true; + } + break; + + case FS_OPCODE_PULL_CONSTANT_LOAD: + inst->src[i] = entry->src; + progress = true; + break; + + default: + break; + } + } + + return progress; +} + /** @file brw_fs_copy_propagation.cpp * * Support for local copy propagation by walking the list of instructions @@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, foreach_list(entry_node, acp) { acp_entry *entry = (acp_entry *)entry_node; + if (try_constant_propagate(inst, entry)) + progress = true; + for (int i = 0; i < 3; i++) { if (try_copy_propagate(inst, i, entry)) progress = true; @@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx, ((inst->src[0].file == GRF && (inst->src[0].reg != inst->dst.reg || inst->src[0].reg_offset != inst->dst.reg_offset)) || - inst->src[0].file == UNIFORM) && + inst->src[0].file == UNIFORM || + inst->src[0].file == IMM) && inst->src[0].type == inst->dst.type && !inst->saturate && !inst->predicated && -- 2.30.2