scan_inst->flags_written() != flags_written)
goto not_match;
- /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
+ /* From the Kaby Lake PRM Vol. 7 "Assigning Conditional Flags":
*
* * Note that the [post condition signal] bits generated at
* the output of a compute are before the .sat.
*
- * So we don't have to bail if scan_inst has saturate.
+ * Paragraph about post_zero does not mention saturation, but
+ * testing it on actual GPUs shows that conditional modifiers
+ * are applied after saturation.
+ *
+ * * post_zero bit: This bit reflects whether the final
+ * result is zero after all the clamping, normalizing,
+ * or format conversion logic.
+ *
+ * For signed types we don't care about saturation: it won't
+ * change the result of conditional modifier.
+ *
+ * For floating and unsigned types there two special cases,
+ * when we can remove inst even if scan_inst is saturated: G
+ * and LE. Since conditional modifiers are just comparations
+ * against zero, saturating positive values to the upper
+ * limit never changes the result of comparation.
+ *
+ * For negative values:
+ * (sat(x) > 0) == (x > 0) --- false
+ * (sat(x) <= 0) == (x <= 0) --- true
*/
- /* Otherwise, try propagating the conditional. */
const enum brw_conditional_mod cond =
negate ? brw_swap_cmod(inst->conditional_mod)
: inst->conditional_mod;
+ if (scan_inst->saturate &&
+ (brw_reg_type_is_floating_point(scan_inst->dst.type) ||
+ type_is_unsigned_int(scan_inst->dst.type)) &&
+ (cond != BRW_CONDITIONAL_G &&
+ cond != BRW_CONDITIONAL_LE))
+ goto not_match;
+
+ /* Otherwise, try propagating the conditional. */
if (scan_inst->can_do_cmod() &&
((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
scan_inst->conditional_mod == cond)) {
EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
}
+
+TEST_F(cmod_propagation_test, cmp_to_add_float_e)
+{
+ const fs_builder &bld = v->bld;
+ fs_reg dest = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg neg10(brw_imm_f(-10.0f));
+ fs_reg pos10(brw_imm_f(10.0f));
+
+ bld.ADD(dest, src0, neg10)->saturate = true;
+ bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_EQ);
+
+ /* = Before =
+ * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f
+ * 1: cmp.z.f0.0(8) null:F, vgrf1:F, 10f
+ *
+ * = After =
+ * (no changes)
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_FALSE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(1, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
+ EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_to_add_float_g)
+{
+ const fs_builder &bld = v->bld;
+ fs_reg dest = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg neg10(brw_imm_f(-10.0f));
+ fs_reg pos10(brw_imm_f(10.0f));
+
+ bld.ADD(dest, src0, neg10)->saturate = true;
+ bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_G);
+
+ /* = Before =
+ * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f
+ * 1: cmp.g.f0.0(8) null:F, vgrf1:F, 10f
+ *
+ * = After =
+ * 0: add.sat.g.f0.0(8) vgrf0:F, vgrf1:F, -10f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_G, instruction(block0, 0)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, cmp_to_add_float_le)
+{
+ const fs_builder &bld = v->bld;
+ fs_reg dest = v->vgrf(glsl_type::float_type);
+ fs_reg src0 = v->vgrf(glsl_type::float_type);
+ fs_reg neg10(brw_imm_f(-10.0f));
+ fs_reg pos10(brw_imm_f(10.0f));
+
+ bld.ADD(dest, src0, neg10)->saturate = true;
+ bld.CMP(bld.null_reg_f(), src0, pos10, BRW_CONDITIONAL_LE);
+
+ /* = Before =
+ * 0: add.sat(8) vgrf0:F, vgrf1:F, -10f
+ * 1: cmp.le.f0.0(8) null:F, vgrf1:F, 10f
+ *
+ * = After =
+ * 0: add.sat.le.f0.0(8) vgrf0:F, vgrf1:F, -10f
+ */
+
+ v->calculate_cfg();
+ bblock_t *block0 = v->cfg->blocks[0];
+
+ EXPECT_TRUE(cmod_propagation(v));
+ EXPECT_EQ(0, block0->start_ip);
+ EXPECT_EQ(0, block0->end_ip);
+ EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+ EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod);
+}