+2017-04-04 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/79390
+ * target.h (struct noce_if_info): Declare.
+ * targhooks.h (default_noce_conversion_profitable_p): Declare.
+ * target.def (noce_conversion_profitable_p): New target hook.
+ * ifcvt.h (struct noce_if_info): New type, moved from ...
+ * ifcvt.c (struct noce_if_info): ... here.
+ (noce_conversion_profitable_p): Renamed to ...
+ (default_noce_conversion_profitable_p): ... this. No longer
+ static nor inline.
+ (noce_try_store_flag_constants, noce_try_addcc,
+ noce_try_store_flag_mask, noce_try_cmove, noce_try_cmove_arith,
+ noce_convert_multiple_sets): Use targetm.noce_conversion_profitable_p
+ instead of noce_conversion_profitable_p.
+ * config/i386/i386.c: Include ifcvt.h.
+ (ix86_option_override_internal): Don't override
+ PARAM_MAX_RTL_IF_CONVERSION_INSNS default.
+ (ix86_noce_conversion_profitable_p): New function.
+ (TARGET_NOCE_CONVERSION_PROFITABLE_P): Redefine.
+ * config/i386/x86-tune.def (X86_TUNE_ONE_IF_CONV_INSN): Adjust comment.
+ * doc/tm.texi.in (TARGET_NOCE_CONVERSION_PROFITABLE_P): Add.
+ * doc/tm.texi: Regenerated.
+
2017-04-04 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* doc/extend.texi (PowerPC AltiVec Built-in Functions): Grammar
#include "selftest-rtl.h"
#include "print-rtl.h"
#include "intl.h"
+#include "ifcvt.h"
/* This file should be included last. */
#include "target-def.h"
opts->x_param_values,
opts_set->x_param_values);
- /* Restrict number of if-converted SET insns to 1. */
- if (TARGET_ONE_IF_CONV_INSN)
- maybe_set_param_value (PARAM_MAX_RTL_IF_CONVERSION_INSNS,
- 1,
- opts->x_param_values,
- opts_set->x_param_values);
-
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
if (opts->x_flag_prefetch_loop_arrays < 0
&& HAVE_prefetch
return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
}
+/* Return true if SEQ is a good candidate as a replacement for the
+ if-convertible sequence described in IF_INFO. */
+
+static bool
+ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+{
+ if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
+ {
+ int cmov_cnt = 0;
+ /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
+ Maybe we should allow even more conditional moves as long as they
+ are used far enough not to stall the CPU, or also consider
+ IF_INFO->TEST_BB succ edge probabilities. */
+ for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
+ {
+ rtx set = single_set (insn);
+ if (!set)
+ continue;
+ if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+ continue;
+ rtx src = SET_SRC (set);
+ enum machine_mode mode = GET_MODE (src);
+ if (GET_MODE_CLASS (mode) != MODE_INT
+ && GET_MODE_CLASS (mode) != MODE_FLOAT)
+ continue;
+ if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
+ || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
+ continue;
+ /* insn is CMOV or FCMOV. */
+ if (++cmov_cnt > 1)
+ return false;
+ }
+ }
+ return default_noce_conversion_profitable_p (seq, if_info);
+}
/* Implement targetm.vectorize.init_cost. */
#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
the unroll factor so that the unrolled loop fits the loop buffer. */
DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
-/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of set insns to be
- if-converted to one. */
+/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
+ if-converted sequence to one. */
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
and uses a multiple of @code{BRANCH_COST} otherwise.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_NOCE_CONVERSION_PROFITABLE_P (rtx_insn *@var{seq}, struct noce_if_info *@var{if_info})
+This hook returns true if the instruction sequence @code{seq} is a good
+candidate as a replacement for the if-convertible sequence described in
+@code{if_info}.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P (void)
This predicate controls the use of the eager delay slot filler to disallow
speculatively executed instructions being placed in delay slots. Targets
@hook TARGET_MAX_NOCE_IFCVT_SEQ_COST
+@hook TARGET_NOCE_CONVERSION_PROFITABLE_P
+
@hook TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P
@node Scheduling
return FALSE;
}
\f
-/* Used by noce_process_if_block to communicate with its subroutines.
-
- The subroutines know that A and B may be evaluated freely. They
- know that X is a register. They should insert new instructions
- before cond_earliest. */
-
-struct noce_if_info
-{
- /* The basic blocks that make up the IF-THEN-{ELSE-,}JOIN block. */
- basic_block test_bb, then_bb, else_bb, join_bb;
-
- /* The jump that ends TEST_BB. */
- rtx_insn *jump;
-
- /* The jump condition. */
- rtx cond;
-
- /* Reversed jump condition. */
- rtx rev_cond;
-
- /* New insns should be inserted before this one. */
- rtx_insn *cond_earliest;
-
- /* Insns in the THEN and ELSE block. There is always just this
- one insns in those blocks. The insns are single_set insns.
- If there was no ELSE block, INSN_B is the last insn before
- COND_EARLIEST, or NULL_RTX. In the former case, the insn
- operands are still valid, as if INSN_B was moved down below
- the jump. */
- rtx_insn *insn_a, *insn_b;
-
- /* The SET_SRC of INSN_A and INSN_B. */
- rtx a, b;
-
- /* The SET_DEST of INSN_A. */
- rtx x;
-
- /* The original set destination that the THEN and ELSE basic blocks finally
- write their result to. */
- rtx orig_x;
- /* True if this if block is not canonical. In the canonical form of
- if blocks, the THEN_BB is the block reached via the fallthru edge
- from TEST_BB. For the noce transformations, we allow the symmetric
- form as well. */
- bool then_else_reversed;
-
- /* True if the contents of then_bb and else_bb are a
- simple single set instruction. */
- bool then_simple;
- bool else_simple;
-
- /* True if we're optimisizing the control block for speed, false if
- we're optimizing for size. */
- bool speed_p;
-
- /* An estimate of the original costs. When optimizing for size, this is the
- combined cost of COND, JUMP and the costs for THEN_BB and ELSE_BB.
- When optimizing for speed, we use the costs of COND plus the minimum of
- the costs for THEN_BB and ELSE_BB, as computed in the next field. */
- unsigned int original_cost;
-
- /* Maximum permissible cost for the unconditional sequence we should
- generate to replace this branch. */
- unsigned int max_seq_cost;
-
- /* The name of the noce transform that succeeded in if-converting
- this structure. Used for debugging. */
- const char *transform_name;
-};
-
static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int);
static int noce_try_move (struct noce_if_info *);
static int noce_try_ifelse_collapse (struct noce_if_info *);
return reversed_comparison_code (if_info->cond, if_info->jump);
}
-/* Return TRUE if SEQ is a good candidate as a replacement for the
- if-convertible sequence described in IF_INFO. */
+/* Return true if SEQ is a good candidate as a replacement for the
+ if-convertible sequence described in IF_INFO.
+ This is the default implementation that targets can override
+ through a target hook. */
-inline static bool
-noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+bool
+default_noce_conversion_profitable_p (rtx_insn *seq,
+ struct noce_if_info *if_info)
{
bool speed_p = if_info->speed_p;
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
noce_emit_move_insn (if_info->x, target);
seq = end_ifcvt_sequence (if_info);
- if (!seq || !noce_conversion_profitable_p (seq, if_info))
+ if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info))
return FALSE;
emit_insn_before_setloc (seq, if_info->jump,
noce_emit_move_insn (x, target);
ifcvt_seq = end_ifcvt_sequence (if_info);
- if (!ifcvt_seq || !noce_conversion_profitable_p (ifcvt_seq, if_info))
+ if (!ifcvt_seq || !targetm.noce_conversion_profitable_p (ifcvt_seq, if_info))
return FALSE;
emit_insn_before_setloc (ifcvt_seq, if_info->jump,
/* Actually emit the sequence if it isn't too expensive. */
rtx_insn *seq = get_insns ();
- if (!noce_conversion_profitable_p (seq, if_info))
+ if (!targetm.noce_conversion_profitable_p (seq, if_info))
{
end_sequence ();
return FALSE;
int pass; /* Pass number. */
};
+/* Used by noce_process_if_block to communicate with its subroutines.
+
+ The subroutines know that A and B may be evaluated freely. They
+ know that X is a register. They should insert new instructions
+ before cond_earliest. */
+
+struct noce_if_info
+{
+ /* The basic blocks that make up the IF-THEN-{ELSE-,}JOIN block. */
+ basic_block test_bb, then_bb, else_bb, join_bb;
+
+ /* The jump that ends TEST_BB. */
+ rtx_insn *jump;
+
+ /* The jump condition. */
+ rtx cond;
+
+ /* Reversed jump condition. */
+ rtx rev_cond;
+
+ /* New insns should be inserted before this one. */
+ rtx_insn *cond_earliest;
+
+ /* Insns in the THEN and ELSE block. There is always just this
+ one insns in those blocks. The insns are single_set insns.
+ If there was no ELSE block, INSN_B is the last insn before
+ COND_EARLIEST, or NULL_RTX. In the former case, the insn
+ operands are still valid, as if INSN_B was moved down below
+ the jump. */
+ rtx_insn *insn_a, *insn_b;
+
+ /* The SET_SRC of INSN_A and INSN_B. */
+ rtx a, b;
+
+ /* The SET_DEST of INSN_A. */
+ rtx x;
+
+ /* The original set destination that the THEN and ELSE basic blocks finally
+ write their result to. */
+ rtx orig_x;
+ /* True if this if block is not canonical. In the canonical form of
+ if blocks, the THEN_BB is the block reached via the fallthru edge
+ from TEST_BB. For the noce transformations, we allow the symmetric
+ form as well. */
+ bool then_else_reversed;
+
+ /* True if the contents of then_bb and else_bb are a
+ simple single set instruction. */
+ bool then_simple;
+ bool else_simple;
+
+ /* True if we're optimisizing the control block for speed, false if
+ we're optimizing for size. */
+ bool speed_p;
+
+ /* An estimate of the original costs. When optimizing for size, this is the
+ combined cost of COND, JUMP and the costs for THEN_BB and ELSE_BB.
+ When optimizing for speed, we use the costs of COND plus the minimum of
+ the costs for THEN_BB and ELSE_BB, as computed in the next field. */
+ unsigned int original_cost;
+
+ /* Maximum permissible cost for the unconditional sequence we should
+ generate to replace this branch. */
+ unsigned int max_seq_cost;
+
+ /* The name of the noce transform that succeeded in if-converting
+ this structure. Used for debugging. */
+ const char *transform_name;
+};
+
#endif /* GCC_IFCVT_H */
unsigned int, (edge e),
default_max_noce_ifcvt_seq_cost)
+/* Return true if the given instruction sequence is a good candidate
+ as a replacement for the if-convertible sequence. */
+DEFHOOK
+(noce_conversion_profitable_p,
+ "This hook returns true if the instruction sequence @code{seq} is a good\n\
+candidate as a replacement for the if-convertible sequence described in\n\
+@code{if_info}.",
+bool, (rtx_insn *seq, struct noce_if_info *if_info),
+default_noce_conversion_profitable_p)
+
/* Permit speculative instructions in delay slots during delayed-branch
scheduling. */
DEFHOOK
/* This is defined in cfgloop.h . */
struct loop;
+/* This is defined in ifcvt.h. */
+struct noce_if_info;
+
/* This is defined in tree-ssa-alias.h. */
struct ao_ref;
extern bool default_optab_supported_p (int, machine_mode, machine_mode,
optimization_type);
extern unsigned int default_max_noce_ifcvt_seq_cost (edge);
+extern bool default_noce_conversion_profitable_p (rtx_insn *,
+ struct noce_if_info *);
extern unsigned int default_min_arithmetic_precision (void);
extern enum flt_eval_method
+2017-04-04 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/79390
+ * gcc.target/i386/pr79390.c: New test.
+ * gcc.dg/ifcvt-4.c: Use -mtune-ctrl=^one_if_conv_insn for i?86/x86_64.
+
2017-04-04 Volker Reichelt <v.reichelt@netcologne.de>
PR c++/80296
/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3 --param max-rtl-if-conversion-unpredictable-cost=100" } */
/* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */
/* { dg-additional-options "-march=z196" { target { s390x-*-* } } } */
+/* { dg-additional-options "-mtune-ctrl=^one_if_conv_insn" { target { i?86-*-* x86_64-*-* } } } */
/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* hppa*64*-*-* s390-*-* visium-*-*" riscv*-*-* } } */
/* { dg-skip-if "" { "s390x-*-*" } { "-m31" } } */
--- /dev/null
+/* PR tree-optimization/79390 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -march=haswell -mtune=haswell -mfpmath=sse" } */
+/* Make sure we emit a conditional move in this loop. */
+
+extern double A[32];
+
+int
+foo (void)
+{
+ double t = A[0];
+ int jp = 0;
+ int i;
+
+ for (i = 0; i < 32; i++)
+ {
+ double ab = A[i];
+ if (ab > t)
+ {
+ jp = i;
+ t = ab;
+ }
+ }
+
+ return jp;
+}
+
+/* { dg-final { scan-assembler "\[ \\t\]cmov\[a-z]+\[ \\t\]" } } */