X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_cse.cpp;h=accc037af8dead1c15a52a3f00a4f04bf1c3e4ac;hb=4064a6cd207811434e5400a613b3833fbda6b787;hp=2e65ef78548f2205d01f1748fd0f518a30a63a86;hpb=700bebb958e93f4d472c383de62ced9db8e64bec;p=mesa.git diff --git a/src/intel/compiler/brw_vec4_cse.cpp b/src/intel/compiler/brw_vec4_cse.cpp index 2e65ef78548..accc037af8d 100644 --- a/src/intel/compiler/brw_vec4_cse.cpp +++ b/src/intel/compiler/brw_vec4_cse.cpp @@ -104,6 +104,25 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b) return xs[0].equals(ys[0]) && ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) || (xs[2].equals(ys[1]) && xs[1].equals(ys[2]))); + } else if (a->opcode == BRW_OPCODE_MOV && + xs[0].file == IMM && + xs[0].type == BRW_REGISTER_TYPE_VF) { + src_reg tmp_x = xs[0]; + src_reg tmp_y = ys[0]; + + /* Smash out the values that are not part of the writemask. Otherwise + * the equals operator will fail due to mismatches in unused components. + */ + const unsigned ab_writemask = a->dst.writemask & b->dst.writemask; + const uint32_t mask = ((ab_writemask & WRITEMASK_X) ? 0x000000ff : 0) | + ((ab_writemask & WRITEMASK_Y) ? 0x0000ff00 : 0) | + ((ab_writemask & WRITEMASK_Z) ? 0x00ff0000 : 0) | + ((ab_writemask & WRITEMASK_W) ? 0xff000000 : 0); + + tmp_x.ud &= mask; + tmp_y.ud &= mask; + + return tmp_x.equals(tmp_y); } else if (!a->is_commutative()) { return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]); } else { @@ -112,6 +131,14 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b) } } +/** + * Checks if instructions match, exactly for sources, but loosely for + * destination writemasks. + * + * \param 'a' is the generating expression from the AEB entry. + * \param 'b' is the second occurrence of the expression that we're + * considering eliminating. + */ static bool instructions_match(vec4_instruction *a, vec4_instruction *b) { @@ -127,7 +154,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b) a->base_mrf == b->base_mrf && a->header_size == b->header_size && a->shadow_compare == b->shadow_compare && - a->dst.writemask == b->dst.writemask && + ((a->dst.writemask & b->dst.writemask) == a->dst.writemask) && a->force_writemask_all == b->force_writemask_all && a->size_written == b->size_written && a->exec_size == b->exec_size && @@ -136,7 +163,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b) } bool -vec4_visitor::opt_cse_local(bblock_t *block) +vec4_visitor::opt_cse_local(bblock_t *block, const vec4_live_variables &live) { bool progress = false; exec_list aeb; @@ -261,7 +288,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) * more -- a sure sign they'll fail operands_match(). */ if (src->file == VGRF) { - if (var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) { + if (live.var_range_end(var_from_reg(alloc, dst_reg(*src)), 8) < ip) { entry->remove(); ralloc_free(entry); break; @@ -282,15 +309,14 @@ bool vec4_visitor::opt_cse() { bool progress = false; - - calculate_live_intervals(); + const vec4_live_variables &live = live_analysis.require(); foreach_block (block, cfg) { - progress = opt_cse_local(block) || progress; + progress = opt_cse_local(block, live) || progress; } if (progress) - invalidate_live_intervals(); + invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES); return progress; }