From: Ian Romanick Date: Fri, 9 Mar 2018 21:45:01 +0000 (-0800) Subject: i965/fs: Propagate conditional modifiers from compares to adds X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=020b0055e7a085a6a8c961ad12ce94e58606a1ae;p=mesa.git i965/fs: Propagate conditional modifiers from compares to adds The math inside the add and the cmp in this instruction sequence is the same. We can utilize this to eliminate the compare. add(8) g5<1>F g2<8,8,1>F g64.5<0,1,0>F { align1 1Q compacted }; cmp.z.f0(8) null<1>F g2<8,8,1>F -g64.5<0,1,0>F { align1 1Q switch }; (-f0) sel(8) g8<1>F (abs)g5<8,8,1>F 3e-37F { align1 1Q }; This is reduced to: add.z.f0(8) g5<1>F g2<8,8,1>F g64.5<0,1,0>F { align1 1Q compacted }; (-f0) sel(8) g8<1>F (abs)g5<8,8,1>F 3e-37F { align1 1Q }; This optimization pass could do even better. The nature of converting vectorized code from the GLSL front end to scalar code in NIR results in sequences like: add(8) g7<1>F g4<8,8,1>F g64.5<0,1,0>F { align1 1Q compacted }; add(8) g6<1>F g3<8,8,1>F g64.5<0,1,0>F { align1 1Q compacted }; add(8) g5<1>F g2<8,8,1>F g64.5<0,1,0>F { align1 1Q compacted }; cmp.z.f0(8) null<1>F g2<8,8,1>F -g64.5<0,1,0>F { align1 1Q switch }; (-f0) sel(8) g8<1>F (abs)g5<8,8,1>F 3e-37F { align1 1Q }; cmp.z.f0(8) null<1>F g3<8,8,1>F -g64.5<0,1,0>F { align1 1Q switch }; (-f0) sel(8) g10<1>F (abs)g6<8,8,1>F 3e-37F { align1 1Q }; cmp.z.f0(8) null<1>F g4<8,8,1>F -g64.5<0,1,0>F { align1 1Q switch }; (-f0) sel(8) g12<1>F (abs)g7<8,8,1>F 3e-37F { align1 1Q }; In this sequence, only the first cmp.z is removed. With different scheduling, all 3 could get removed. Skylake total instructions in shared programs: 14407009 -> 14400173 (-0.05%) instructions in affected programs: 1307274 -> 1300438 (-0.52%) helped: 4880 HURT: 0 helped stats (abs) min: 1 max: 33 x̄: 1.40 x̃: 1 helped stats (rel) min: 0.03% max: 8.70% x̄: 0.70% x̃: 0.52% 95% mean confidence interval for instructions value: -1.45 -1.35 95% mean confidence interval for instructions %-change: -0.72% -0.69% Instructions are helped. total cycles in shared programs: 532943169 -> 532923528 (<.01%) cycles in affected programs: 14065798 -> 14046157 (-0.14%) helped: 2703 HURT: 339 helped stats (abs) min: 1 max: 1062 x̄: 12.27 x̃: 2 helped stats (rel) min: <.01% max: 28.72% x̄: 0.38% x̃: 0.21% HURT stats (abs) min: 1 max: 739 x̄: 39.86 x̃: 12 HURT stats (rel) min: 0.02% max: 27.69% x̄: 1.38% x̃: 0.41% 95% mean confidence interval for cycles value: -8.66 -4.26 95% mean confidence interval for cycles %-change: -0.24% -0.14% Cycles are helped. LOST: 0 GAINED: 1 Broadwell total instructions in shared programs: 14719636 -> 14712949 (-0.05%) instructions in affected programs: 1288188 -> 1281501 (-0.52%) helped: 4845 HURT: 0 helped stats (abs) min: 1 max: 33 x̄: 1.38 x̃: 1 helped stats (rel) min: 0.03% max: 8.00% x̄: 0.70% x̃: 0.52% 95% mean confidence interval for instructions value: -1.43 -1.33 95% mean confidence interval for instructions %-change: -0.72% -0.68% Instructions are helped. total cycles in shared programs: 559599253 -> 559581699 (<.01%) cycles in affected programs: 13315565 -> 13298011 (-0.13%) helped: 2600 HURT: 269 helped stats (abs) min: 1 max: 2128 x̄: 12.24 x̃: 2 helped stats (rel) min: <.01% max: 23.95% x̄: 0.41% x̃: 0.20% HURT stats (abs) min: 1 max: 790 x̄: 53.07 x̃: 20 HURT stats (rel) min: 0.02% max: 15.96% x̄: 1.55% x̃: 0.75% 95% mean confidence interval for cycles value: -8.47 -3.77 95% mean confidence interval for cycles %-change: -0.27% -0.18% Cycles are helped. LOST: 0 GAINED: 8 Haswell total instructions in shared programs: 12978609 -> 12973483 (-0.04%) instructions in affected programs: 932921 -> 927795 (-0.55%) helped: 3480 HURT: 0 helped stats (abs) min: 1 max: 33 x̄: 1.47 x̃: 1 helped stats (rel) min: 0.03% max: 7.84% x̄: 0.78% x̃: 0.58% 95% mean confidence interval for instructions value: -1.53 -1.42 95% mean confidence interval for instructions %-change: -0.80% -0.75% Instructions are helped. total cycles in shared programs: 410270788 -> 410250531 (<.01%) cycles in affected programs: 10986161 -> 10965904 (-0.18%) helped: 2087 HURT: 254 helped stats (abs) min: 1 max: 2672 x̄: 14.63 x̃: 4 helped stats (rel) min: <.01% max: 39.61% x̄: 0.42% x̃: 0.21% HURT stats (abs) min: 1 max: 519 x̄: 40.49 x̃: 16 HURT stats (rel) min: 0.01% max: 12.83% x̄: 1.20% x̃: 0.47% 95% mean confidence interval for cycles value: -12.82 -4.49 95% mean confidence interval for cycles %-change: -0.31% -0.18% Cycles are helped. LOST: 0 GAINED: 5 Ivy Bridge total instructions in shared programs: 11686082 -> 11681548 (-0.04%) instructions in affected programs: 937696 -> 933162 (-0.48%) helped: 3150 HURT: 0 helped stats (abs) min: 1 max: 33 x̄: 1.44 x̃: 1 helped stats (rel) min: 0.03% max: 7.84% x̄: 0.69% x̃: 0.49% 95% mean confidence interval for instructions value: -1.49 -1.38 95% mean confidence interval for instructions %-change: -0.71% -0.67% Instructions are helped. total cycles in shared programs: 257514962 -> 257492471 (<.01%) cycles in affected programs: 11524149 -> 11501658 (-0.20%) helped: 1970 HURT: 239 helped stats (abs) min: 1 max: 3525 x̄: 17.48 x̃: 3 helped stats (rel) min: <.01% max: 49.60% x̄: 0.46% x̃: 0.17% HURT stats (abs) min: 1 max: 1358 x̄: 50.00 x̃: 15 HURT stats (rel) min: 0.02% max: 59.88% x̄: 1.84% x̃: 0.65% 95% mean confidence interval for cycles value: -17.01 -3.35 95% mean confidence interval for cycles %-change: -0.33% -0.08% Cycles are helped. LOST: 9 GAINED: 1 Sandy Bridge total instructions in shared programs: 10432841 -> 10429893 (-0.03%) instructions in affected programs: 685071 -> 682123 (-0.43%) helped: 2453 HURT: 0 helped stats (abs) min: 1 max: 9 x̄: 1.20 x̃: 1 helped stats (rel) min: 0.02% max: 7.55% x̄: 0.64% x̃: 0.46% 95% mean confidence interval for instructions value: -1.23 -1.17 95% mean confidence interval for instructions %-change: -0.67% -0.62% Instructions are helped. total cycles in shared programs: 146133660 -> 146134195 (<.01%) cycles in affected programs: 3991634 -> 3992169 (0.01%) helped: 1237 HURT: 153 helped stats (abs) min: 1 max: 2853 x̄: 6.93 x̃: 2 helped stats (rel) min: <.01% max: 29.00% x̄: 0.24% x̃: 0.14% HURT stats (abs) min: 1 max: 1740 x̄: 59.56 x̃: 12 HURT stats (rel) min: 0.03% max: 78.98% x̄: 1.96% x̃: 0.42% 95% mean confidence interval for cycles value: -5.13 5.90 95% mean confidence interval for cycles %-change: -0.17% 0.16% Inconclusive result (value mean confidence interval includes 0). LOST: 0 GAINED: 1 GM45 and Iron Lake had similar results (GM45 shown): total instructions in shared programs: 4800332 -> 4798380 (-0.04%) instructions in affected programs: 565995 -> 564043 (-0.34%) helped: 1451 HURT: 0 helped stats (abs) min: 1 max: 20 x̄: 1.35 x̃: 1 helped stats (rel) min: 0.05% max: 5.26% x̄: 0.47% x̃: 0.31% 95% mean confidence interval for instructions value: -1.40 -1.29 95% mean confidence interval for instructions %-change: -0.50% -0.45% Instructions are helped. total cycles in shared programs: 122032318 -> 122027798 (<.01%) cycles in affected programs: 8334868 -> 8330348 (-0.05%) helped: 1029 HURT: 1 helped stats (abs) min: 2 max: 40 x̄: 4.43 x̃: 2 helped stats (rel) min: <.01% max: 1.83% x̄: 0.09% x̃: 0.04% HURT stats (abs) min: 38 max: 38 x̄: 38.00 x̃: 38 HURT stats (rel) min: 0.25% max: 0.25% x̄: 0.25% x̃: 0.25% 95% mean confidence interval for cycles value: -4.70 -4.08 95% mean confidence interval for cycles %-change: -0.09% -0.08% Cycles are helped. Signed-off-by: Ian Romanick Reviewed-by: Alejandro Piñeiro Reviewed-by: Matt Turner --- diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index b995a51d3c3..462e51d504e 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -63,8 +63,14 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) inst->predicate != BRW_PREDICATE_NONE || !inst->dst.is_null() || (inst->src[0].file != VGRF && inst->src[0].file != ATTR && - inst->src[0].file != UNIFORM) || - inst->src[0].abs) + inst->src[0].file != UNIFORM)) + continue; + + /* An ABS source modifier can only be handled when processing a compare + * with a value other than zero. + */ + if (inst->src[0].abs && + (inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero())) continue; /* Only an AND.NZ can be propagated. Many AND.Z instructions are @@ -80,15 +86,68 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) !inst->src[0].negate)) continue; - if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) - continue; - if (inst->opcode == BRW_OPCODE_MOV && inst->conditional_mod != BRW_CONDITIONAL_NZ) continue; bool read_flag = false; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { + /* A CMP with a second source of zero can match with anything. A CMP + * with a second source that is not zero can only match with an ADD + * instruction. + */ + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { + bool negate; + + if (scan_inst->opcode != BRW_OPCODE_ADD) + goto not_match; + + /* A CMP is basically a subtraction. The result of the + * subtraction must be the same as the result of the addition. + * This means that one of the operands must be negated. So (a + + * b) vs (a == -b) or (a + -b) vs (a == b). + */ + if ((inst->src[0].equals(scan_inst->src[0]) && + inst->src[1].negative_equals(scan_inst->src[1])) || + (inst->src[0].equals(scan_inst->src[1]) && + inst->src[1].negative_equals(scan_inst->src[0]))) { + negate = false; + } else if ((inst->src[0].negative_equals(scan_inst->src[0]) && + inst->src[1].equals(scan_inst->src[1])) || + (inst->src[0].negative_equals(scan_inst->src[1]) && + inst->src[1].equals(scan_inst->src[0]))) { + negate = true; + } else { + goto not_match; + } + + if (scan_inst->is_partial_write() || + scan_inst->exec_size != inst->exec_size) + goto not_match; + + /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": + * + * * Note that the [post condition signal] bits generated at + * the output of a compute are before the .sat. + * + * So we don't have to bail if scan_inst has saturate. + */ + + /* Otherwise, try propagating the conditional. */ + const enum brw_conditional_mod cond = + negate ? brw_swap_cmod(inst->conditional_mod) + : inst->conditional_mod; + + if (scan_inst->can_do_cmod() && + ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || + scan_inst->conditional_mod == cond)) { + scan_inst->conditional_mod = cond; + inst->remove(block); + progress = true; + } + break; + } + if (regions_overlap(scan_inst->dst, scan_inst->size_written, inst->src[0], inst->size_read(0))) { if (scan_inst->is_partial_write() || @@ -183,6 +242,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) break; } + not_match: if (scan_inst->flags_written()) break; diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index a97e374f74e..659fbb2d1bc 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -554,3 +554,338 @@ TEST_F(cmod_propagation_test, andz_one) EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); } + +TEST_F(cmod_propagation_test, add_not_merge_with_compare) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* The addition and the implicit subtraction in the compare do not compute + * related values. + * + * = Before = + * 0: add(8) dest:F src0:F src1:F + * 1: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * (no changes) + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, subtract_merge_with_compare) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest, src0, negate(src1)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest:F src0:F -src1:F + * 1: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0(8) dest:F src0:F -src1:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, subtract_immediate_merge_with_compare) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg one(brw_imm_f(1.0f)); + fs_reg negative_one(brw_imm_f(-1.0f)); + + bld.ADD(dest, src0, negative_one); + bld.CMP(bld.null_reg_f(), src0, one, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: add(8) dest:F src0:F -1.0f + * 1: cmp.nz.f0(8) null:F src0:F 1.0f + * + * = After = + * 0: add.nz.f0(8) dest:F src0:F -1.0f + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, subtract_merge_with_compare_intervening_add) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest0, src0, negate(src1)); + bld.ADD(dest1, src0, src1); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest0:F src0:F -src1:F + * 1: add(8) dest1:F src0:F src1:F + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: add(8) dest1:F src0:F src1:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_partial_write) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest0, src0, negate(src1)); + set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, src0, negate(src1))); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest0:F src0:F -src1:F + * 1: (+f0) add(8) dest1:F src0:F -src1:F + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * (no changes) + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, subtract_not_merge_with_compare_intervening_add) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest0, src0, negate(src1)); + set_condmod(BRW_CONDITIONAL_EQ, bld.ADD(dest1, src0, src1)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest0:F src0:F -src1:F + * 1: add.z.f0(8) dest1:F src0:F src1:F + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * (no changes) + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, add_merge_with_compare) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src0, negate(src1), BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest:F src0:F src1:F + * 1: cmp.l.f0(8) null:F src0:F -src1:F + * + * = After = + * 0: add.l.f0(8) dest:F src0:F src1:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, negative_subtract_merge_with_compare) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.ADD(dest, src1, negate(src0)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* The result of the subtract is the negatiion of the result of the + * implicit subtract in the compare, so the condition must change. + * + * = Before = + * 0: add(8) dest:F src1:F -src0:F + * 1: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.g.f0(8) dest:F src0:F -src1:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, subtract_delete_compare) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + + set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1))); + set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: (+f0) mov(0) dest1:F src2:F + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0(8) dest:F src0:F -src1:F + * 1: (+f0) mov(0) dest1:F src2:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); +} + +TEST_F(cmod_propagation_test, subtract_delete_compare_derp) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + + set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest0, src0, negate(src1))); + set_predicate(BRW_PREDICATE_NORMAL, bld.ADD(dest1, negate(src0), src1)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: (+f0) add(0) dest1:F -src0:F src1:F + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: (+f0) add(0) dest1:F -src0:F src1:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); +}