+2017-06-21 Andrew Pinski <apinski@cavium.com>
+
+ * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
+ Increment Arith_shift and Arith_shift_reg by 1.
+ * config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend):
+ New tuning flag.
+ * config/aarch64/aarch64.c (thunderx_tunings): Enable
+ AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND.
+ (aarch64_strip_extend): Add new argument and test for it.
+ (aarch64_cheap_mult_shift_p): New function.
+ (aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't
+ add a cost if it is true.
+ Update calls to aarch64_strip_extend.
+ (aarch64_rtx_costs): Update calls to aarch64_strip_extend.
+
2017-06-21 Andrew Pinski <apinski@cavium.com>
* config/aarch64/aarch64-cores.def (thunderxt88p1): Use thunderxt88
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
&thunderx_prefetch_tune
};
/* Helper function for rtx cost calculation. Strip an extend
expression from X. Returns the inner operand if successful, or the
original expression on failure. We deal with a number of possible
- canonicalization variations here. */
+ canonicalization variations here. If STRIP_SHIFT is true, then
+ we can strip off a shift also. */
static rtx
-aarch64_strip_extend (rtx x)
+aarch64_strip_extend (rtx x, bool strip_shift)
{
rtx op = x;
/* Now handle extended register, as this may also have an optional
left shift by 1..4. */
- if (GET_CODE (op) == ASHIFT
+ if (strip_shift
+ && GET_CODE (op) == ASHIFT
&& CONST_INT_P (XEXP (op, 1))
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
op = XEXP (op, 0);
return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
}
+
+/* Return true iff X is a cheap shift without a sign extend. */
+
+static bool
+aarch64_cheap_mult_shift_p (rtx x)
+{
+ rtx op0, op1;
+
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ if (!(aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
+ return false;
+
+ if (GET_CODE (op0) == SIGN_EXTEND)
+ return false;
+
+ if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
+ && UINTVAL (op1) <= 4)
+ return true;
+
+ if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
+ return false;
+
+ HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
+
+ if (l2 > 0 && l2 <= 4)
+ return true;
+
+ return false;
+}
+
/* Helper function for rtx cost calculation. Calculate the cost of
a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
Return the calculated cost of the expression, recursing manually in to
{
if (compound_p)
{
- if (REG_P (op1))
+ /* If the shift is considered cheap,
+ then don't add any cost. */
+ if (aarch64_cheap_mult_shift_p (x))
+ ;
+ else if (REG_P (op1))
/* ARITH + shift-by-register. */
cost += extra_cost->alu.arith_shift_reg;
else if (is_extend)
}
/* Strip extends as we will have costed them in the case above. */
if (is_extend)
- op0 = aarch64_strip_extend (op0);
+ op0 = aarch64_strip_extend (op0, true);
cost += rtx_cost (op0, VOIDmode, code, 0, speed);
if (speed)
*cost += extra_cost->alu.extend_arith;
- op1 = aarch64_strip_extend (op1);
+ op1 = aarch64_strip_extend (op1, true);
*cost += rtx_cost (op1, VOIDmode,
(enum rtx_code) GET_CODE (op1), 0, speed);
return true;
}
- rtx new_op1 = aarch64_strip_extend (op1);
+ rtx new_op1 = aarch64_strip_extend (op1, false);
/* Cost this as an FMA-alike operation. */
if ((GET_CODE (new_op1) == MULT
if (speed)
*cost += extra_cost->alu.extend_arith;
- op0 = aarch64_strip_extend (op0);
+ op0 = aarch64_strip_extend (op0, true);
*cost += rtx_cost (op0, VOIDmode,
(enum rtx_code) GET_CODE (op0), 0, speed);
return true;
/* Strip any extend, leave shifts behind as we will
cost them through mult_cost. */
- new_op0 = aarch64_strip_extend (op0);
+ new_op0 = aarch64_strip_extend (op0, false);
if (GET_CODE (new_op0) == MULT
|| aarch64_shift_p (GET_CODE (new_op0)))