From: Ian Romanick Date: Mon, 11 Mar 2019 22:49:26 +0000 (-0700) Subject: intel/fs: Allow cmod propagation to instructions with saturate modifier X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a79570099bddecbd927b87c43e51b9a2c77c4833;p=mesa.git intel/fs: Allow cmod propagation to instructions with saturate modifier v2: Add unit tests. Suggested by Matt. All Intel GPUs had similar results. (Ice Lake shown) total instructions in shared programs: 17229441 -> 17228658 (<.01%) instructions in affected programs: 159574 -> 158791 (-0.49%) helped: 489 HURT: 0 helped stats (abs) min: 1 max: 5 x̄: 1.60 x̃: 1 helped stats (rel) min: 0.07% max: 2.70% x̄: 0.61% x̃: 0.59% 95% mean confidence interval for instructions value: -1.72 -1.48 95% mean confidence interval for instructions %-change: -0.64% -0.58% Instructions are helped. total cycles in shared programs: 360944149 -> 360937144 (<.01%) cycles in affected programs: 1072195 -> 1065190 (-0.65%) helped: 254 HURT: 27 helped stats (abs) min: 2 max: 234 x̄: 30.51 x̃: 9 helped stats (rel) min: 0.04% max: 8.99% x̄: 0.75% x̃: 0.24% HURT stats (abs) min: 2 max: 83 x̄: 27.56 x̃: 24 HURT stats (rel) min: 0.09% max: 3.79% x̄: 1.28% x̃: 1.16% 95% mean confidence interval for cycles value: -30.11 -19.75 95% mean confidence interval for cycles %-change: -0.70% -0.41% Cycles are helped. Reviewed-by: Matt Turner [v1] --- diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index c9725263929..b430d4b2446 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -313,14 +313,6 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) scan_inst->opcode == BRW_OPCODE_CMPN) break; - /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": - * - * * Note that the [post condition signal] bits generated at - * the output of a compute are before the .sat. - */ - if (scan_inst->saturate) - break; - /* From the Sky Lake PRM, Vol 2a, "Multiply": * * "When multiplying integer data types, if one of the sources @@ -338,11 +330,52 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) scan_inst->opcode == BRW_OPCODE_MUL) break; - /* Otherwise, try propagating the conditional. */ enum brw_conditional_mod cond = inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod) : inst->conditional_mod; + /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods": + * + * * Note that the [post condition signal] bits generated at + * the output of a compute are before the .sat. + * + * This limits the cases where we can propagate the conditional + * modifier. If scan_inst has a saturate modifier, then we can + * only propagate from inst if inst is 'scan_inst <= 0', + * 'scan_inst == 0', 'scan_inst != 0', or 'scan_inst > 0'. If + * inst is 'scan_inst == 0', the conditional modifier must be + * replace with LE. Likewise, if inst is 'scan_inst != 0', the + * conditional modifier must be replace with G. + * + * The only other cases are 'scan_inst < 0' (which is a + * contradiction) and 'scan_inst >= 0' (which is a tautology). + */ + if (scan_inst->saturate) { + if (scan_inst->dst.type != BRW_REGISTER_TYPE_F) + break; + + if (cond != BRW_CONDITIONAL_Z && + cond != BRW_CONDITIONAL_NZ && + cond != BRW_CONDITIONAL_LE && + cond != BRW_CONDITIONAL_G) + break; + + if (inst->opcode != BRW_OPCODE_MOV && + inst->opcode != BRW_OPCODE_CMP) + break; + + /* inst->src[1].is_zero() was tested before, but be safe + * against possible future changes in this code. + */ + assert(inst->opcode != BRW_OPCODE_CMP || inst->src[1].is_zero()); + + if (cond == BRW_CONDITIONAL_Z) + cond = BRW_CONDITIONAL_LE; + else if (cond == BRW_CONDITIONAL_NZ) + cond = BRW_CONDITIONAL_G; + } + + /* Otherwise, try propagating the conditional. */ if (scan_inst->can_do_cmod() && ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || scan_inst->conditional_mod == cond)) { diff --git a/src/intel/compiler/test_fs_cmod_propagation.cpp b/src/intel/compiler/test_fs_cmod_propagation.cpp index faabc236da3..6801f94de0c 100644 --- a/src/intel/compiler/test_fs_cmod_propagation.cpp +++ b/src/intel/compiler/test_fs_cmod_propagation.cpp @@ -38,6 +38,16 @@ public: struct brw_wm_prog_data *prog_data; struct gl_shader_program *shader_prog; fs_visitor *v; + + void test_positive_float_saturate_prop(enum brw_conditional_mod before, + enum brw_conditional_mod after, + enum opcode op); + + void test_negative_float_saturate_prop(enum brw_conditional_mod before, + enum opcode op); + + void test_negative_int_saturate_prop(enum brw_conditional_mod before, + enum opcode op); }; class cmod_propagation_fs_visitor : public fs_visitor @@ -921,3 +931,479 @@ TEST_F(cmod_propagation_test, signed_unsigned_comparison_mismatch) EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 1)->conditional_mod); } + +void +cmod_propagation_test::test_positive_float_saturate_prop(enum brw_conditional_mod before, + enum brw_conditional_mod after, + enum opcode op) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + bld.ADD(dest, src0, src1)->saturate = true; + + assert(op == BRW_OPCODE_CMP || op == BRW_OPCODE_MOV); + if (op == BRW_OPCODE_CMP) + bld.CMP(bld.null_reg_f(), dest, zero, before); + else + bld.MOV(bld.null_reg_f(), dest)->conditional_mod = before; + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_EQ(after, instruction(block0, 0)->conditional_mod); +} + +void +cmod_propagation_test::test_negative_float_saturate_prop(enum brw_conditional_mod before, + enum opcode op) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + fs_reg zero(brw_imm_f(0.0f)); + bld.ADD(dest, src0, src1)->saturate = true; + + assert(op == BRW_OPCODE_CMP || op == BRW_OPCODE_MOV); + if (op == BRW_OPCODE_CMP) + bld.CMP(bld.null_reg_f(), dest, zero, before); + else + bld.MOV(bld.null_reg_f(), dest)->conditional_mod = before; + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(op, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); + EXPECT_EQ(before, instruction(block0, 1)->conditional_mod); +} + +void +cmod_propagation_test::test_negative_int_saturate_prop(enum brw_conditional_mod before, + enum opcode op) +{ + const fs_builder &bld = v->bld; + fs_reg dest = v->vgrf(glsl_type::int_type); + fs_reg src0 = v->vgrf(glsl_type::int_type); + fs_reg src1 = v->vgrf(glsl_type::int_type); + fs_reg zero(brw_imm_d(0)); + bld.ADD(dest, src0, src1)->saturate = true; + + assert(op == BRW_OPCODE_CMP || op == BRW_OPCODE_MOV); + if (op == BRW_OPCODE_CMP) + bld.CMP(bld.null_reg_d(), dest, zero, before); + else + bld.MOV(bld.null_reg_d(), dest)->conditional_mod = before; + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(op, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); + EXPECT_EQ(before, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, float_saturate_nz_cmp) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) != 0) == (x > 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.nz.f0(8) null dest 0.0f + * + * = After = + * 0: add.sat.g.f0(8) dest src0 src1 + */ + test_positive_float_saturate_prop(BRW_CONDITIONAL_NZ, BRW_CONDITIONAL_G, + BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, float_saturate_nz_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) != 0) == (x > 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.nz.f0(8) null dest + * + * = After = + * 0: add.sat.g.f0(8) dest src0 src1 + */ + test_positive_float_saturate_prop(BRW_CONDITIONAL_NZ, BRW_CONDITIONAL_G, + BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, float_saturate_z_cmp) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) == 0) == (x <= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.z.f0(8) null dest 0.0f + * + * = After = + * 0: add.sat.le.f0(8) dest src0 src1 + */ + test_positive_float_saturate_prop(BRW_CONDITIONAL_Z, BRW_CONDITIONAL_LE, + BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, float_saturate_z_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) == 0) == (x <= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.z.f0(8) null dest + * + * = After = + * 0: add.sat.le.f0(8) dest src0 src1 + */ +#if 1 + /* cmod propagation bails on every MOV except MOV.NZ. */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_Z, BRW_OPCODE_MOV); +#else + test_positive_float_saturate_prop(BRW_CONDITIONAL_Z, BRW_CONDITIONAL_LE, + BRW_OPCODE_MOV); +#endif +} + +TEST_F(cmod_propagation_test, float_saturate_g_cmp) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) > 0) == (x > 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.g.f0(8) null dest 0.0f + * + * = After = + * 0: add.sat.g.f0(8) dest src0 src1 + */ + test_positive_float_saturate_prop(BRW_CONDITIONAL_G, BRW_CONDITIONAL_G, + BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, float_saturate_g_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) > 0) == (x > 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.g.f0(8) null dest + * + * = After = + * 0: add.sat.g.f0(8) dest src0 src1 + */ +#if 1 + /* cmod propagation bails on every MOV except MOV.NZ. */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_G, BRW_OPCODE_MOV); +#else + test_positive_float_saturate_prop(BRW_CONDITIONAL_G, BRW_CONDITIONAL_G, + BRW_OPCODE_MOV); +#endif +} + +TEST_F(cmod_propagation_test, float_saturate_le_cmp) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) <= 0) == (x <= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.le.f0(8) null dest 0.0f + * + * = After = + * 0: add.sat.le.f0(8) dest src0 src1 + */ + test_positive_float_saturate_prop(BRW_CONDITIONAL_LE, BRW_CONDITIONAL_LE, + BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, float_saturate_le_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) <= 0) == (x <= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.le.f0(8) null dest + * + * = After = + * 0: add.sat.le.f0(8) dest src0 src1 + */ +#if 1 + /* cmod propagation bails on every MOV except MOV.NZ. */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_LE, BRW_OPCODE_MOV); +#else + test_positive_float_saturate_prop(BRW_CONDITIONAL_LE, BRW_CONDITIONAL_LE, + BRW_OPCODE_MOV); +#endif +} + +TEST_F(cmod_propagation_test, float_saturate_l_cmp) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. There is no before / after equivalence for (sat(x) < 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.l.f0(8) null dest 0.0f + * + * = After = + * No change + */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_CMP); +} + +#if 0 +TEST_F(cmod_propagation_test, float_saturate_l_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. There is no before / after equivalence for (sat(x) < 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.l.f0(8) null dest 0.0f + * + * = After = + * No change + */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_MOV); +} +#endif + +TEST_F(cmod_propagation_test, float_saturate_ge_cmp) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. There is no before / after equivalence for (sat(x) >= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.ge.f0(8) null dest 0.0f + * + * = After = + * No change + */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, float_saturate_ge_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. There is no before / after equivalence for (sat(x) >= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.ge.f0(8) null dest 0.0f + * + * = After = + * No change + */ + test_negative_float_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, int_saturate_nz_cmp) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.nz.f0(8) null dest 0 + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_NZ, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, int_saturate_nz_mov) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.nz.f0(8) null dest + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_NZ, BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, int_saturate_z_cmp) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.z.f0(8) null dest 0 + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_Z, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, int_saturate_z_mov) +{ + /* With the saturate modifier, the comparison happens before clamping to + * [0, 1]. (sat(x) == 0) == (x <= 0). + * + * = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.z.f0(8) null dest + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_Z, BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, int_saturate_g_cmp) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.g.f0(8) null dest 0 + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_G, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, int_saturate_g_mov) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.g.f0(8) null dest + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_G, BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, int_saturate_le_cmp) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.le.f0(8) null dest 0 + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_LE, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, int_saturate_le_mov) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.le.f0(8) null dest + * + * = After = + * No change. + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_LE, BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, int_saturate_l_cmp) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.l.f0(8) null dest 0 + * + * = After = + * No change + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, int_saturate_l_mov) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.l.f0(8) null dest 0 + * + * = After = + * No change + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_L, BRW_OPCODE_MOV); +} + +TEST_F(cmod_propagation_test, int_saturate_ge_cmp) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: cmp.ge.f0(8) null dest 0 + * + * = After = + * No change + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_CMP); +} + +TEST_F(cmod_propagation_test, int_saturate_ge_mov) +{ + /* = Before = + * + * 0: add.sat(8) dest src0 src1 + * 1: mov.ge.f0(8) null dest + * + * = After = + * No change + */ + test_negative_int_saturate_prop(BRW_CONDITIONAL_GE, BRW_OPCODE_MOV); +}