From: Matt Turner Date: Wed, 28 Jan 2015 06:46:22 +0000 (-0800) Subject: i965/fs: Allow saturate propagation to propagate negations into MULs. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7b6113bc2d3493f5b7dcf00c51eb1e90a477d067;p=mesa.git i965/fs: Allow saturate propagation to propagate negations into MULs. Allows us to transform mul res src0 src1 mov.sat dst -res into mul.sat dst src0 -src1 instructions in affected programs: 45246 -> 45054 (-0.42%) helped: 162 Reviewed-by: Ian Romanick --- diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index 52570943996..b9ad6fb8bc8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -56,8 +56,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) inst->dst.file != VGRF || inst->dst.type != inst->src[0].type || inst->src[0].file != VGRF || - inst->src[0].abs || - inst->src[0].negate) + inst->src[0].abs) continue; int src_var = v->live_intervals->var_from_reg(inst->src[0]); @@ -82,6 +81,16 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) scan_inst->src[i].type = inst->dst.type; } } + + if (inst->src[0].negate) { + if (scan_inst->opcode == BRW_OPCODE_MUL) { + scan_inst->src[0].negate = !scan_inst->src[0].negate; + inst->src[0].negate = false; + } else { + break; + } + } + scan_inst->saturate = true; inst->saturate = false; progress = true; @@ -96,7 +105,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) if (scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate || scan_inst->src[0].abs || - scan_inst->src[0].negate) { + scan_inst->src[0].negate || + scan_inst->src[0].abs != inst->src[0].abs || + scan_inst->src[0].negate != inst->src[0].negate) { interfered = true; break; } diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 32e8b8f8867..30c14724ff0 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -244,6 +244,129 @@ TEST_F(saturate_propagation_test, neg_mov_sat) EXPECT_TRUE(instruction(block0, 1)->saturate); } +TEST_F(saturate_propagation_test, mul_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * + * = After = + * 0: mul.sat(8) dst0 src0 -src1 + * 1: mov(8) dst1 dst0 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_TRUE(instruction(block0, 0)->src[0].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); + EXPECT_FALSE(instruction(block0, 1)->src[0].negate); +} + +TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + dst0.negate = true; + set_saturate(true, bld.MOV(dst2, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 dst0 + * 2: mov.sat(8) dst2 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->src[0].negate); + EXPECT_TRUE(instruction(block0, 2)->saturate); +} + +TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + set_saturate(true, bld.MOV(dst2, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * 2: mov.sat(8) dst2 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->src[0].negate); + EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->src[0].negate); + EXPECT_TRUE(instruction(block0, 2)->saturate); +} + TEST_F(saturate_propagation_test, abs_mov_sat) { const fs_builder &bld = v->bld;