const int unpredictable; /* Unpredictable branch or optimizing for speed. */
};
+/* Control approximate alternatives to certain FP operators. */
+#define AARCH64_APPROX_MODE(MODE) \
+ ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
+ ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
+ : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
+ ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
+ + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
+ : (0))
+#define AARCH64_APPROX_NONE (0)
+#define AARCH64_APPROX_ALL (-1)
+
+/* Allowed modes for approximations. */
+struct cpu_approx_modes
+{
+ const unsigned int recip_sqrt; /* Reciprocal square root. */
+};
+
struct tune_params
{
const struct cpu_cost_table *insn_extra_cost;
const struct cpu_regmove_cost *regmove_cost;
const struct cpu_vector_cost *vec_costs;
const struct cpu_branch_cost *branch_costs;
+ const struct cpu_approx_modes *approx_modes;
int memmov_cost;
int issue_rate;
unsigned int fusible_ops;
3 /* Unpredictable. */
};
+/* Generic approximation modes. */
+static const cpu_approx_modes generic_approx_modes =
+{
+ AARCH64_APPROX_NONE /* recip_sqrt */
+};
+
+/* Approximation modes for Exynos M1. */
+static const cpu_approx_modes exynosm1_approx_modes =
+{
+ AARCH64_APPROX_ALL /* recip_sqrt */
+};
+
+/* Approximation modes for X-Gene 1. */
+static const cpu_approx_modes xgene1_approx_modes =
+{
+ AARCH64_APPROX_ALL /* recip_sqrt */
+};
+
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
&generic_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
+ &generic_approx_modes,
4, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
&cortexa53_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
+ &generic_approx_modes,
4, /* memmov_cost */
1, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
&cortexa53_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
+ &generic_approx_modes,
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
&cortexa57_branch_cost,
+ &generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
&generic_branch_cost,
+ &generic_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
&exynosm1_regmove_cost,
&exynosm1_vector_cost,
&generic_branch_cost,
+ &exynosm1_approx_modes,
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
48, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
static const struct tune_params thunderx_tunings =
&thunderx_regmove_cost,
&generic_vector_cost,
&generic_branch_cost,
+ &generic_approx_modes,
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
&xgene1_regmove_cost,
&xgene1_vector_cost,
&generic_branch_cost,
+ &xgene1_approx_modes,
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
/* Support for fine-grained override of the tuning structures. */
to optimize 1.0/sqrt. */
static bool
-use_rsqrt_p (void)
+use_rsqrt_p (machine_mode mode)
{
return (!flag_trapping_math
&& flag_unsafe_math_optimizations
- && ((aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
+ && ((aarch64_tune_params.approx_modes->recip_sqrt
+ & AARCH64_APPROX_MODE (mode))
|| flag_mrecip_low_precision_sqrt));
}
static tree
aarch64_builtin_reciprocal (tree fndecl)
{
- if (!use_rsqrt_p ())
+ machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl));
+
+ if (!use_rsqrt_p (mode))
return NULL_TREE;
return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl));
}
/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
static bool
-aarch64_optab_supported_p (int op, machine_mode, machine_mode,
+aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
optimization_type opt_type)
{
switch (op)
{
case rsqrt_optab:
- return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
+ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
default:
return true;