--- /dev/null
+/* { dg-require-effective-target scalar_all_fma } */
+/* { dg-additional-options "-fdump-tree-optimized -ffp-contract=fast" } */
+
+#include "tree-vect.h"
+
+#define N (VECTOR_BITS * 11 / 64 + 3)
+
+#define DEF(INV) \
+ void __attribute__ ((noipa)) \
+ f_##INV (double *restrict a, double *restrict b, \
+ double *restrict c, double *restrict d) \
+ { \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double mb = (INV & 1 ? -b[i] : b[i]); \
+ double mc = c[i]; \
+ double md = (INV & 2 ? -d[i] : d[i]); \
+ a[i] = b[i] < 10 ? mb * mc + md : 10.0; \
+ } \
+ }
+
+#define TEST(INV) \
+ { \
+ f_##INV (a, b, c, d); \
+ for (int i = 0; i < N; ++i) \
+ { \
+ double mb = (INV & 1 ? -b[i] : b[i]); \
+ double mc = c[i]; \
+ double md = (INV & 2 ? -d[i] : d[i]); \
+ double fma = __builtin_fma (mb, mc, md); \
+ if (a[i] != (i % 17 < 10 ? fma : 10.0)) \
+ __builtin_abort (); \
+ asm volatile ("" ::: "memory"); \
+ } \
+ }
+
+#define FOR_EACH_INV(T) \
+ T (0) T (1) T (2) T (3)
+
+FOR_EACH_INV (DEF)
+
+int
+main (void)
+{
+ double a[N], b[N], c[N], d[N];
+ for (int i = 0; i < N; ++i)
+ {
+ b[i] = i % 17;
+ c[i] = i % 9 + 11;
+ d[i] = i % 13 + 14;
+ asm volatile ("" ::: "memory");
+ }
+ FOR_EACH_INV (TEST)
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 1 "optimized" { target vect_double_cond_arith } } } */
+/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 1 "optimized" { target vect_double_cond_arith } } } */
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed.
+ If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
+ on MUL_COND, otherwise it is unconditional.
If STATE indicates that we are deferring FMA transformation, that means
that we do not produce FMAs for basic blocks which look like:
static bool
convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
- fma_deferring_state *state)
+ fma_deferring_state *state, tree mul_cond = NULL_TREE)
{
tree mul_result = gimple_get_lhs (mul_stmt);
tree type = TREE_TYPE (mul_result);
return false;
}
+ if (mul_cond && cond != mul_cond)
+ return false;
+
if (cond)
{
if (cond == result || else_value == result)
}
else if (is_gimple_call (stmt))
{
- tree fndecl = gimple_call_fndecl (stmt);
- if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+ switch (gimple_call_combined_fn (stmt))
{
- switch (DECL_FUNCTION_CODE (fndecl))
+ CASE_CFN_POW:
+ if (gimple_call_lhs (stmt)
+ && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
+ && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
+ &dconst2)
+ && convert_mult_to_fma (stmt,
+ gimple_call_arg (stmt, 0),
+ gimple_call_arg (stmt, 0),
+ &fma_state))
{
- case BUILT_IN_POWF:
- case BUILT_IN_POW:
- case BUILT_IN_POWL:
- if (gimple_call_lhs (stmt)
- && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
- && real_equal
- (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
- &dconst2)
- && convert_mult_to_fma (stmt,
- gimple_call_arg (stmt, 0),
- gimple_call_arg (stmt, 0),
- &fma_state))
- {
- unlink_stmt_vdef (stmt);
- if (gsi_remove (&gsi, true)
- && gimple_purge_dead_eh_edges (bb))
- *m_cfg_changed_p = true;
- release_defs (stmt);
- continue;
- }
- break;
+ unlink_stmt_vdef (stmt);
+ if (gsi_remove (&gsi, true)
+ && gimple_purge_dead_eh_edges (bb))
+ *m_cfg_changed_p = true;
+ release_defs (stmt);
+ continue;
+ }
+ break;
- default:;
+ case CFN_COND_MUL:
+ if (convert_mult_to_fma (stmt,
+ gimple_call_arg (stmt, 1),
+ gimple_call_arg (stmt, 2),
+ &fma_state,
+ gimple_call_arg (stmt, 0)))
+
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ continue;
}
+ break;
+
+ case CFN_LAST:
+ cancel_fma_deferring (&fma_state);
+ break;
+
+ default:
+ break;
}
- else
- cancel_fma_deferring (&fma_state);
}
gsi_next (&gsi);
}