From 32b7ba66b0156d9fd40b059f20da79a74451f7fd Mon Sep 17 00:00:00 2001 From: Yevhenii Kolesnikov Date: Fri, 3 Jan 2020 16:37:00 +0200 Subject: [PATCH] intel/compiler: fix cmod propagation optimisations Knowing following: - CMP writes to flag register the result of applying cmod to the `src0 - src1`. After that it stores the same value to dst. Other instructions first store their result to dst, and then store cmod(dst) to the flag register. - inst is either CMP or MOV - inst->dst is null - inst->src[0] overlaps with scan_inst->dst - inst->src[1] is zero - scan_inst wrote to a flag register There can be three possible paths: - scan_inst is CMP: Considering that src0 is either 0x0 (false), or 0xffffffff (true), and src1 is 0x0: - If inst's cmod is NZ, we can always remove scan_inst: NZ is invariant for false and true. This holds even if src0 is NaN: .nz is the only cmod, that returns true for NaN. - .g is invariant if src0 has a UD type - .l is invariant if src0 has a D type - scan_inst and inst have the same cmod: If scan_inst is anything than CMP, it already wrote the appropriate value to the flag register. - else: We can change cmod of scan_inst to that of inst, and remove inst. It is valid as long as we make sure that no instruction uses the flag register between scan_inst and inst. Nine new cmod_propagation unit tests: - cmp_cmpnz - cmp_cmpg - plnnz_cmpnz - plnnz_cmpz (*) - plnnz_sel_cmpz - cmp_cmpg_D - cmp_cmpg_UD (*) - cmp_cmpl_D (*) - cmp_cmpl_UD (*) this would fail without changes to brw_fs_cmod_propagation. This fixes optimisation that used to be illegal (see issue #2154) = Before = 0: linterp.z.f0.0(8) vgrf0:F, g2:F, attr0<0>:F 1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f = After = 0: linterp.z.f0.0(8) vgrf0:F, g2:F, attr0<0>:F Now it is optimised as such (note change of cmod in line 0): = Before = 0: linterp.z.f0.0(8) vgrf0:F, g2:F, attr0<0>:F 1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f = After = 0: linterp.nz.f0.0(8) vgrf0:F, g2:F, attr0<0>:F No shaderdb changes Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2154 Signed-off-by: Yevhenii Kolesnikov Reviewed-by: Matt Turner Tested-by: Marge Bot Part-of: --- .../compiler/brw_fs_cmod_propagation.cpp | 70 ++++- .../compiler/test_fs_cmod_propagation.cpp | 275 ++++++++++++++++++ 2 files changed, 336 insertions(+), 9 deletions(-) diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index 0b02b336d69..743a435871c 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -328,17 +328,69 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) } } - /* If the instruction generating inst's source also wrote the - * flag, and inst is doing a simple .nz comparison, then inst - * is redundant - the appropriate value is already in the flag - * register. Delete inst. + /* Knowing following: + * - CMP writes to flag register the result of + * applying cmod to the `src0 - src1`. + * After that it stores the same value to dst. + * Other instructions first store their result to + * dst, and then store cmod(dst) to the flag + * register. + * - inst is either CMP or MOV + * - inst->dst is null + * - inst->src[0] overlaps with scan_inst->dst + * - inst->src[1] is zero + * - scan_inst wrote to a flag register + * + * There can be three possible paths: + * + * - scan_inst is CMP: + * + * Considering that src0 is either 0x0 (false), + * or 0xffffffff (true), and src1 is 0x0: + * + * - If inst's cmod is NZ, we can always remove + * scan_inst: NZ is invariant for false and true. This + * holds even if src0 is NaN: .nz is the only cmod, + * that returns true for NaN. + * + * - .g is invariant if src0 has a UD type + * + * - .l is invariant if src0 has a D type + * + * - scan_inst and inst have the same cmod: + * + * If scan_inst is anything than CMP, it already + * wrote the appropriate value to the flag register. + * + * - else: + * + * We can change cmod of scan_inst to that of inst, + * and remove inst. It is valid as long as we make + * sure that no instruction uses the flag register + * between scan_inst and inst. */ - if (inst->conditional_mod == BRW_CONDITIONAL_NZ && - !inst->src[0].negate && + if (!inst->src[0].negate && scan_inst->flags_written()) { - inst->remove(block); - progress = true; - break; + if (scan_inst->opcode == BRW_OPCODE_CMP) { + if ((inst->conditional_mod == BRW_CONDITIONAL_NZ) || + (inst->conditional_mod == BRW_CONDITIONAL_G && + inst->src[0].type == BRW_REGISTER_TYPE_UD) || + (inst->conditional_mod == BRW_CONDITIONAL_L && + inst->src[0].type == BRW_REGISTER_TYPE_D)) { + inst->remove(block); + progress = true; + break; + } + } else if (scan_inst->conditional_mod == inst->conditional_mod) { + inst->remove(block); + progress = true; + break; + } else if (!read_flag) { + scan_inst->conditional_mod = inst->conditional_mod; + inst->remove(block); + progress = true; + break; + } } /* The conditional mod of the CMP/CMPN instructions behaves diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index 1fa2da2b619..0b634a371c8 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -646,6 +646,281 @@ TEST_F(cmod_propagation_test, andnz_non_one) EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); } +TEST_F(cmod_propagation_test, cmp_cmpnz) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0)); + + bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: cmp.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f + * 1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f + * + * = After = + * 0: cmp.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_cmpg) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0)); + + bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_G); + + /* = Before = + * 0: cmp.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f + * 1: cmp.g.f0.0(8) null:F, vgrf0:F, 0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, plnnz_cmpnz) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0)); + + set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero)); + bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: pln.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f + * 1: cmp.nz.f0.0(8) null:F, vgrf0:F, 0f + * + * = After = + * 0: pln.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_PLN, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, plnnz_cmpz) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0)); + + set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero)); + bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z); + + /* = Before = + * 0: pln.nz.f0.0(8) vgrf0:F, vgrf1:F, 0f + * 1: cmp.z.f0.0(8) null:F, vgrf0:F, 0f + * + * = After = + * 0: pln.z.f0.0(8) vgrf0:F, vgrf1:F, 0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_PLN, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, plnnz_sel_cmpz) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0)); + + set_condmod(BRW_CONDITIONAL_NZ, bld.PLN(dst0, src0, zero)); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dst1, src0, zero)); + bld.CMP(bld.null_reg_f(), dst0, zero, BRW_CONDITIONAL_Z); + + /* = Before = + * 0: pln.nz.f0.0(8) vgrf0:F, vgrf2:F, 0f + * 1: (+f0.0) sel(8) vgrf1:F, vgrf2:F, 0f + * 2: cmp.z.f0.0(8) null:F, vgrf0:F, 0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_PLN, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_cmpg_D) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::int_type); + fs_reg zero(brw_imm_d(0)); + fs_reg one(brw_imm_d(1)); + + bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_G); + + /* = Before = + * 0: cmp.nz.f0.0(8) vgrf0:D, vgrf1:D, 0d + * 1: cmp.g.f0.0(8) null:D, vgrf0:D, 0d + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_cmpg_UD) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::uint_type); + fs_reg src0 = v->vgrf(glsl_type::uint_type); + fs_reg zero(brw_imm_ud(0)); + + bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_G); + + /* = Before = + * 0: cmp.nz.f0.0(8) vgrf0:UD, vgrf1:UD, 0u + * 1: cmp.g.f0.0(8) null:UD, vgrf0:UD, 0u + * + * = After = + * 0: cmp.nz.f0.0(8) vgrf0:UD, vgrf1:UD, 0u + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_cmpl_D) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::int_type); + fs_reg zero(brw_imm_d(0)); + + bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_d(), dst0, zero, BRW_CONDITIONAL_L); + + /* = Before = + * 0: cmp.nz.f0.0(8) vgrf0:D, vgrf1:D, 0d + * 1: cmp.l.f0.0(8) null:D, vgrf0:D, 0d + * + * = After = + * 0: cmp.nz.f0.0(8) vgrf0:D, vgrf1:D, 0d + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_cmpl_UD) +{ + const fs_builder &bld = v->bld; + + fs_reg dst0 = v->vgrf(glsl_type::uint_type); + fs_reg src0 = v->vgrf(glsl_type::uint_type); + fs_reg zero(brw_imm_ud(0)); + + bld.CMP(dst0, src0, zero, BRW_CONDITIONAL_NZ); + bld.CMP(bld.null_reg_ud(), dst0, zero, BRW_CONDITIONAL_L); + + /* = Before = + * 0: cmp.nz.f0.0(8) vgrf0:UD, vgrf1:UD, 0u + * 1: cmp.l.f0.0(8) null:UD, vgrf0:UD, 0u + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod); +} + TEST_F(cmod_propagation_test, andz_one) { const fs_builder &bld = v->bld; -- 2.30.2