From e13a5c7d676021b2d1030e3644db95eb1803e767 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 22 May 2019 12:32:03 -0700 Subject: [PATCH] intel/fs: Allow cmod propagation across reads and writes of different flags MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This also helps a later patch (intel/fs: Improve discard_if code generation) on about 200 shaders. v2: Document that other instruction sequences are also valid in subtract_merge_with_compare_intervening_mismatch_flag_write. Suggested by Caio. All Intel platforms had similar results. (Ice Lake shown) total instructions in shared programs: 17224438 -> 17224434 (<.01%) instructions in affected programs: 296 -> 292 (-1.35%) helped: 4 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.99% max: 1.92% x̄: 1.43% x̃: 1.40% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -2.04% -0.81% Instructions are helped. total cycles in shared programs: 361468455 -> 361468458 (<.01%) cycles in affected programs: 2862 -> 2865 (0.10%) helped: 2 HURT: 2 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 0.24% max: 0.39% x̄: 0.31% x̃: 0.31% HURT stats (abs) min: 3 max: 4 x̄: 3.50 x̃: 3 HURT stats (rel) min: 0.32% max: 0.70% x̄: 0.51% x̃: 0.51% 95% mean confidence interval for cycles value: -4.34 5.84 95% mean confidence interval for cycles %-change: -0.70% 0.90% Inconclusive result (value mean confidence interval includes 0). Reviewed-by: Caio Marcelo de Oliveira Filho Reviewed-by: Matt Turner --- .../compiler/brw_fs_cmod_propagation.cpp | 15 +- .../compiler/test_fs_cmod_propagation.cpp | 263 ++++++++++++++++++ 2 files changed, 272 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index ba4df592424..45ea9206014 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -114,10 +114,11 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, } not_match: - if (scan_inst->flags_written()) + if ((scan_inst->flags_written() & flags_written) != 0) break; - read_flag = read_flag || scan_inst->flags_read(devinfo); + read_flag = read_flag || + (scan_inst->flags_read(devinfo) & flags_written) != 0; } return false; @@ -180,10 +181,11 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, break; } - if (scan_inst->flags_written()) + if ((scan_inst->flags_written() & flags_written) != 0) break; - read_flag = read_flag || scan_inst->flags_read(devinfo); + read_flag = read_flag || + (scan_inst->flags_read(devinfo) & flags_written) != 0; } return false; @@ -423,10 +425,11 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) break; } - if (scan_inst->flags_written()) + if ((scan_inst->flags_written() & flags_written) != 0) break; - read_flag = read_flag || scan_inst->flags_read(devinfo); + read_flag = read_flag || + (scan_inst->flags_read(devinfo) & flags_written) != 0; } } diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index 89e2684eafb..218605000f4 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -277,6 +277,47 @@ TEST_F(cmod_propagation_test, intervening_flag_write) EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); } +TEST_F(cmod_propagation_test, intervening_mismatch_flag_write) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE) + ->flag_subreg = 1; + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add(8) dest src0 src1 + * 1: cmp.ge.f0.1(8) null src2 0.0f + * 2: cmp.ge.f0(8) null dest 0.0f + * + * = After = + * 0: add.ge.f0(8) dest src0 src1 + * 1: cmp.ge.f0.1(8) null src2 0.0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} + TEST_F(cmod_propagation_test, intervening_flag_read) { const fs_builder &bld = v->bld; @@ -316,6 +357,48 @@ TEST_F(cmod_propagation_test, intervening_flag_read) EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); } +TEST_F(cmod_propagation_test, intervening_mismatch_flag_read) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + bld.ADD(dest0, src0, src1); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)) + ->flag_subreg = 1; + bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add(8) dest0 src0 src1 + * 1: (+f0.1) sel(8) dest1 src2 0.0f + * 2: cmp.ge.f0(8) null dest0 0.0f + * + * = After = + * 0: add.ge.f0(8) dest0 src0 src1 + * 1: (+f0.1) sel(8) dest1 src2 0.0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} + TEST_F(cmod_propagation_test, intervening_dest_write) { const fs_builder &bld = v->bld; @@ -976,6 +1059,96 @@ TEST_F(cmod_propagation_test, subtract_to_mismatch_flag) EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); } +TEST_F(cmod_propagation_test, + subtract_merge_with_compare_intervening_mismatch_flag_write) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + + bld.ADD(dest0, src0, negate(src1)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L) + ->flag_subreg = 1; + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest0:F src0:F -src1:F + * 1: cmp.l.f0.1(8) null:F src0:F src1:F + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: cmp.l.f0.1(8) null:F src0:F src1:F + * + * NOTE: Another perfectly valid after sequence would be: + * + * 0: add.f0.1(8) dest0:F src0:F -src1:F + * 1: cmp.l.f0(8) null:F src0:F src1:F + * + * However, the optimization pass starts at the end of the basic block. + * Because of this, the cmp.l.f0 will always be chosen. If the pass + * changes its strategy, this test will also need to change. + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} + +TEST_F(cmod_propagation_test, + subtract_merge_with_compare_intervening_mismatch_flag_read) +{ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + + bld.ADD(dest0, src0, negate(src1)); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)) + ->flag_subreg = 1; + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L); + + /* = Before = + * 0: add(8) dest0:F src0:F -src1:F + * 1: (+f0.1) sel(8) dest1 src2 0.0f + * 2: cmp.l.f0(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: (+f0.1) sel(8) dest1 src2 0.0f + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} + TEST_F(cmod_propagation_test, subtract_delete_compare_derp) { const fs_builder &bld = v->bld; @@ -1844,3 +2017,93 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value EXPECT_EQ(BRW_OPCODE_NOT, instruction(block0, 2)->opcode); EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 2)->conditional_mod); } + +TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_write) +{ + /* Exercise propagation of conditional modifier from a NOT instruction to + * another ALU instruction as performed by cmod_propagate_not. + */ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::uint_type); + fs_reg dest1 = v->vgrf(glsl_type::uint_type); + fs_reg src0 = v->vgrf(glsl_type::uint_type); + fs_reg src1 = v->vgrf(glsl_type::uint_type); + + bld.OR(dest0, src0, src1); + set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest1, src0, src1)) + ->flag_subreg = 1; + set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0)); + + /* = Before = + * + * 0: or(8) dest0 src0 src1 + * 1: or.z.f0.1(8) dest1 src0 src1 + * 2: not.nz.f0(8) null dest0 + * + * = After = + * 0: or.z.f0(8) dest0 src0 src1 + * 1: or.z.f0.1(8) dest1 src0 src1 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} + +TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read) +{ + /* Exercise propagation of conditional modifier from a NOT instruction to + * another ALU instruction as performed by cmod_propagate_not. + */ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::uint_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::uint_type); + fs_reg src1 = v->vgrf(glsl_type::uint_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + + bld.OR(dest0, src0, src1); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)) + ->flag_subreg = 1; + set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0)); + + /* = Before = + * + * 0: or(8) dest0 src0 src1 + * 1: (+f0.1) sel(8) dest1 src2 0.0f + * 2: not.nz.f0(8) null dest0 + * + * = After = + * 0: or.z.f0(8) dest0 src0 src1 + * 1: (+f0.1) sel(8) dest1 src2 0.0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} -- 2.30.2