From 8030cb75c18c129febd4ef0704e8c9b6142a629a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 22 May 2019 10:18:06 -0700 Subject: [PATCH] intel/fs: Fix flag_subreg handling in cmod propagation There were two errors. First, the pass could propagate conditional modifiers from an instruction that writes on flag register to an instruction that writes a different flag register. For example, cmp.nz.f0.0(16) null:F, vgrf6:F, vgrf5:F cmp.nz.f0.1(16) null:F, vgrf6:F, vgrf5:F could be come cmp.nz.f0.0(16) null:F, vgrf6:F, vgrf5:F Second, if an instruction writes f0.1 has it's condition propagated, the modified instruction will incorrectly write flag f0.0. For example, linterp(16) vgrf6:F, g2:F, attr0:F cmp.z.f0.1(16) null:F, vgrf6:F, vgrf5:F (-f0.1) discard_jump(16) (null):UD could become linterp.z.f0.0(16) vgrf6:F, g2:F, attr0:F (-f0.1) discard_jump(16) (null):UD None of these cases will occur currently. The only time we use f0.1 is for generating discard intrinsics. In all those cases, we generate a squence like: cmp.nz.f0.0(16) vgrf7:F, vgrf6:F, vgrf5:F (+f0.1) cmp.z(16) null:D, vgrf7:D, 0d (-f0.1) discard_jump(16) (null):UD Due to the mixed types and incompatible conditions, this sequence would never see any cmod propagation. The next patch will change this. No shader-db changes on any Intel platform. v2: Fix typo in comment in test case subtract_delete_compare_other_flag. Noticed by Caio. Reviewed-by: Caio Marcelo de Oliveira Filho Reviewed-by: Matt Turner --- .../compiler/brw_fs_cmod_propagation.cpp | 37 ++++ .../compiler/test_fs_cmod_propagation.cpp | 159 ++++++++++++++++++ 2 files changed, 196 insertions(+) diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index b430d4b2446..ba4df592424 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -53,6 +53,7 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, fs_inst *inst) { bool read_flag = false; + const unsigned flags_written = inst->flags_written(); foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->opcode == BRW_OPCODE_ADD && @@ -79,6 +80,17 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, goto not_match; } + /* If the scan instruction writes a different flag register than the + * instruction we're trying to propagate from, bail. + * + * FINISHME: The second part of the condition may be too strong. + * Perhaps (scan_inst->flags_written() & flags_written) != + * flags_written? + */ + if (scan_inst->flags_written() != 0 && + scan_inst->flags_written() != flags_written) + goto not_match; + /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": * * * Note that the [post condition signal] bits generated at @@ -130,6 +142,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, { const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod); bool read_flag = false; + const unsigned flags_written = inst->flags_written(); if (cond != BRW_CONDITIONAL_Z && cond != BRW_CONDITIONAL_NZ) return false; @@ -146,6 +159,17 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, scan_inst->exec_size != inst->exec_size) break; + /* If the scan instruction writes a different flag register than the + * instruction we're trying to propagate from, bail. + * + * FINISHME: The second part of the condition may be too strong. + * Perhaps (scan_inst->flags_written() & flags_written) != + * flags_written? + */ + if (scan_inst->flags_written() != 0 && + scan_inst->flags_written() != flags_written) + break; + if (scan_inst->can_do_cmod() && ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || scan_inst->conditional_mod == cond)) { @@ -231,9 +255,21 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) } bool read_flag = false; + const unsigned flags_written = inst->flags_written(); foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, inst->src[0], inst->size_read(0))) { + /* If the scan instruction writes a different flag register than + * the instruction we're trying to propagate from, bail. + * + * FINISHME: The second part of the condition may be too strong. + * Perhaps (scan_inst->flags_written() & flags_written) != + * flags_written? + */ + if (scan_inst->flags_written() != 0 && + scan_inst->flags_written() != flags_written) + break; + if (scan_inst->is_partial_write() || scan_inst->dst.offset != inst->src[0].offset || scan_inst->exec_size != inst->exec_size) @@ -380,6 +416,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || scan_inst->conditional_mod == cond)) { scan_inst->conditional_mod = cond; + scan_inst->flag_subreg = inst->flag_subreg; inst->remove(block); progress = true; } diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index 3aa3459caf2..89e2684eafb 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -140,6 +140,40 @@ TEST_F(cmod_propagation_test, basic) EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); } +TEST_F(cmod_propagation_test, basic_other_flag) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE) + ->flag_subreg = 1; + + /* = Before = + * + * 0: add(8) dest src0 src1 + * 1: cmp.ge.f0.1(8) null dest 0.0f + * + * = After = + * 0: add.ge.f0.1(8) dest src0 src1 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(1, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + TEST_F(cmod_propagation_test, cmp_nonzero) { const fs_builder &bld = v->bld; @@ -864,6 +898,84 @@ TEST_F(cmod_propagation_test, subtract_delete_compare) EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); } +TEST_F(cmod_propagation_test, subtract_delete_compare_other_flag) +{ + /* This test is the same as subtract_delete_compare but it explicitly used + * flag f0.1 for the subtraction and the comparison. + */ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + + set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1))) + ->flag_subreg = 1; + set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(dest1, src2)); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L) + ->flag_subreg = 1; + + /* = Before = + * 0: add.l.f0.1(8) dest0:F src0:F -src1:F + * 1: (+f0) mov(0) dest1:F src2:F + * 2: cmp.l.f0.1(8) null:F src0:F src1:F + * + * = After = + * 0: add.l.f0.1(8) dest:F src0:F -src1:F + * 1: (+f0) mov(0) dest1:F src2:F + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(1, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); +} + +TEST_F(cmod_propagation_test, subtract_to_mismatch_flag) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + + set_condmod(BRW_CONDITIONAL_L, bld.ADD(dest, src0, negate(src1))); + bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L) + ->flag_subreg = 1; + + /* = Before = + * 0: add.l.f0(8) dest0:F src0:F -src1:F + * 1: cmp.l.f0.1(8) null:F src0:F src1:F + * + * = After = + * No changes + */ + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); +} + TEST_F(cmod_propagation_test, subtract_delete_compare_derp) { const fs_builder &bld = v->bld; @@ -1643,6 +1755,53 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_compatible_value) EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); } +TEST_F(cmod_propagation_test, + not_to_or_intervening_flag_read_compatible_value_mismatch_flag) +{ + /* Exercise propagation of conditional modifier from a NOT instruction to + * another ALU instruction as performed by cmod_propagate_not. + */ + const fs_builder &bld = v->bld; + fs_reg dest0 = v->vgrf(glsl_type::uint_type); + fs_reg dest1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::uint_type); + fs_reg src1 = v->vgrf(glsl_type::uint_type); + fs_reg src2 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest0, src0, src1)) + ->flag_subreg = 1; + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0)); + + /* = Before = + * + * 0: or.z.f0.1(8) dest0 src0 src1 + * 1: (+f0) sel(8) dest1 src2 0.0f + * 2: not.nz.f0(8) null dest0 + * + * = After = + * No changes + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(1, instruction(block0, 0)->flag_subreg); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(BRW_OPCODE_NOT, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 2)->conditional_mod); + EXPECT_EQ(0, instruction(block0, 2)->flag_subreg); +} + TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value) { /* Exercise propagation of conditional modifier from a NOT instruction to -- 2.30.2