From 36abb0c6918c89bdd5a155df21dbd7b4ee0a9dde Mon Sep 17 00:00:00 2001 From: Yevhenii Kolesnikov Date: Thu, 12 Mar 2020 19:42:37 +0200 Subject: [PATCH] intel/compiler: don't propagate cmp to add if add is saturated From the Kaby Lake PRM Vol. 7 "Assigning Conditional Flags": * Note that the [post condition signal] bits generated at the output of a compute are before the .sat. Paragraph about post_zero does not mention saturation, but testing it on actual GPUs shows that conditional modifiers are applied after saturation. * post_zero bit: This bit reflects whether the final result is zero after all the clamping, normalizing, or format conversion logic. For signed types we don't care about saturation: it won't change the result of conditional modifier. For floating and unsigned types there two special cases, when we can remove inst even if scan_inst is saturated: G and LE. Since conditional modifiers are just comparations against zero, saturating positive values to the upper limit never changes the result of comparation. For negative values: (sat(x) > 0) == (x > 0) --- false (sat(x) <= 0) == (x <= 0) --- true Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2610 Signed-off-by: Yevhenii Kolesnikov Reviewed-by: Matt Turner Part-of: --- .../compiler/brw_fs_cmod_propagation.cpp | 32 ++++++- .../compiler/test_fs_cmod_propagation.cpp | 89 +++++++++++++++++++ 2 files changed, 118 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index 743a435871c..415716d098d 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -93,18 +93,44 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, scan_inst->flags_written() != flags_written) goto not_match; - /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": + /* From the Kaby Lake PRM Vol. 7 "Assigning Conditional Flags": * * * Note that the [post condition signal] bits generated at * the output of a compute are before the .sat. * - * So we don't have to bail if scan_inst has saturate. + * Paragraph about post_zero does not mention saturation, but + * testing it on actual GPUs shows that conditional modifiers + * are applied after saturation. + * + * * post_zero bit: This bit reflects whether the final + * result is zero after all the clamping, normalizing, + * or format conversion logic. + * + * For signed types we don't care about saturation: it won't + * change the result of conditional modifier. + * + * For floating and unsigned types there two special cases, + * when we can remove inst even if scan_inst is saturated: G + * and LE. Since conditional modifiers are just comparations + * against zero, saturating positive values to the upper + * limit never changes the result of comparation. + * + * For negative values: + * (sat(x) > 0) == (x > 0) --- false + * (sat(x) <= 0) == (x <= 0) --- true */ - /* Otherwise, try propagating the conditional. */ const enum brw_conditional_mod cond = negate ? brw_swap_cmod(inst->conditional_mod) : inst->conditional_mod; + if (scan_inst->saturate && + (brw_reg_type_is_floating_point(scan_inst->dst.type) || + type_is_unsigned_int(scan_inst->dst.type)) && + (cond != BRW_CONDITIONAL_G && + cond != BRW_CONDITIONAL_LE)) + goto not_match; + + /* Otherwise, try propagating the conditional. */ if (scan_inst->can_do_cmod() && ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || scan_inst->conditional_mod == cond)) { diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index 0b634a371c8..af9489e2dbc 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -2381,3 +2381,92 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read) EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); EXPECT_EQ(1, instruction(block0, 1)->flag_subreg); } + +TEST_F(cmod_propagation_test, cmp_to_add_float_e) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg neg10(brw_imm_f(-10.0f)); + fs_reg pos10(brw_imm_f(10.0f)); + + bld.ADD(dest, src0, neg10)->saturate = true; + bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ); + + /* = Before = + * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f + * 1: cmp.z.f0.0(8) null:F, vgrf1:F, 10f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_to_add_float_g) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg neg10(brw_imm_f(-10.0f)); + fs_reg pos10(brw_imm_f(10.0f)); + + bld.ADD(dest, src0, neg10)->saturate = true; + bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G); + + /* = Before = + * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f + * 1: cmp.g.f0.0(8) null:F, vgrf1:F, 10f + * + * = After = + * 0: add.sat.g.f0.0(8) vgrf0:F, vgrf1:F, -10f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_to_add_float_le) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg neg10(brw_imm_f(-10.0f)); + fs_reg pos10(brw_imm_f(10.0f)); + + bld.ADD(dest, src0, neg10)->saturate = true; + bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE); + + /* = Before = + * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f + * 1: cmp.le.f0.0(8) null:F, vgrf1:F, 10f + * + * = After = + * 0: add.sat.le.f0.0(8) vgrf0:F, vgrf1:F, -10f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod); +} -- 2.30.2