+2018-05-23 Luis Machado <luis.machado@linaro.org>
+
+ * config/aarch64/aarch64-protos.h (cpu_prefetch_tune)
+ <minimum_stride>: New const int field.
+ * config/aarch64/aarch64.c (generic_prefetch_tune): Update to include
+ minimum_stride field defaulting to -1.
+ (exynosm1_prefetch_tune): Likewise.
+ (thunderxt88_prefetch_tune): Likewise.
+ (thunderx_prefetch_tune): Likewise.
+ (thunderx2t99_prefetch_tune): Likewise.
+ (qdf24xx_prefetch_tune) <minimum_stride>: Set to 2048.
+ <default_opt_level>: Set to 3.
+ (aarch64_override_options_internal): Update to set
+ PARAM_PREFETCH_MINIMUM_STRIDE.
+ * doc/invoke.texi (prefetch-minimum-stride): Document new option.
+ * params.def (PARAM_PREFETCH_MINIMUM_STRIDE): New.
+ * params.h (PARAM_PREFETCH_MINIMUM_STRIDE): Define.
+ * tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Return false if
+ stride is constant and is below the minimum stride threshold.
+
2018-05-23 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/arm-cpus.in (mode26): Delete.
const int l1_cache_size;
const int l1_cache_line_size;
const int l2_cache_size;
+ /* The minimum constant stride beyond which we should use prefetch
+ hints for. */
+ const int minimum_stride;
const int default_opt_level;
};
-1, /* l1_cache_size */
-1, /* l1_cache_line_size */
-1, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
32, /* l1_cache_size */
64, /* l1_cache_line_size */
512, /* l2_cache_size */
- -1 /* default_opt_level */
+ 2048, /* minimum_stride */
+ 3 /* default_opt_level */
};
static const cpu_prefetch_tune thunderxt88_prefetch_tune =
32, /* l1_cache_size */
128, /* l1_cache_line_size */
16*1024, /* l2_cache_size */
+ -1, /* minimum_stride */
3 /* default_opt_level */
};
32, /* l1_cache_size */
128, /* l1_cache_line_size */
-1, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
32, /* l1_cache_size */
64, /* l1_cache_line_size */
256, /* l2_cache_size */
+ -1, /* minimum_stride */
-1 /* default_opt_level */
};
aarch64_tune_params.prefetch->l2_cache_size,
opts->x_param_values,
global_options_set.x_param_values);
+ if (aarch64_tune_params.prefetch->minimum_stride >= 0)
+ maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
+ aarch64_tune_params.prefetch->minimum_stride,
+ opts->x_param_values,
+ global_options_set.x_param_values);
/* Use the alternative scheduling-pressure algorithm by default. */
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
@item l2-cache-size
The size of L2 cache, in kilobytes.
+@item prefetch-minimum-stride
+Minimum constant stride, in bytes, to start using prefetch hints for. If
+the stride is less than this threshold, prefetch hints will not be issued.
+
+This setting is useful for processors that have hardware prefetchers, in
+which case there may be conflicts between the hardware prefetchers and
+the software prefetchers. If the hardware prefetchers have a maximum
+stride they can handle, it should be used here to improve the use of
+software prefetchers.
+
+A value of -1, the default, means we don't have a threshold and therefore
+prefetch hints can be issued for any constant stride.
+
+This setting is only useful for strides that are known and constant.
+
@item loop-interchange-max-num-stmts
The maximum number of stmts in a loop to be interchanged.
"The size of L2 cache.",
512, 0, 0)
+/* The minimum constant stride beyond which we should use prefetch hints
+ for. */
+
+DEFPARAM (PARAM_PREFETCH_MINIMUM_STRIDE,
+ "prefetch-minimum-stride",
+ "The minimum constant stride beyond which we should use prefetch "
+ "hints for.",
+ -1, 0, 0)
+
/* Maximum number of statements in loop nest for loop interchange. */
DEFPARAM (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS,
PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
#define L2_CACHE_SIZE \
PARAM_VALUE (PARAM_L2_CACHE_SIZE)
+#define PREFETCH_MINIMUM_STRIDE \
+ PARAM_VALUE (PARAM_PREFETCH_MINIMUM_STRIDE)
#define USE_CANONICAL_TYPES \
PARAM_VALUE (PARAM_USE_CANONICAL_TYPES)
#define IRA_MAX_LOOPS_NUM \
static bool
should_issue_prefetch_p (struct mem_ref *ref)
{
+ /* Some processors may have a hardware prefetcher that may conflict with
+ prefetch hints for a range of strides. Make sure we don't issue
+ prefetches for such cases if the stride is within this particular
+ range. */
+ if (cst_and_fits_in_hwi (ref->group->step)
+ && abs_hwi (int_cst_value (ref->group->step))
+ < (HOST_WIDE_INT) PREFETCH_MINIMUM_STRIDE)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Step for reference %u:%u (%ld) is less than the mininum "
+ "required stride of %d\n",
+ ref->group->uid, ref->uid, int_cst_value (ref->group->step),
+ PREFETCH_MINIMUM_STRIDE);
+ return false;
+ }
+
/* For now do not issue prefetches for only first few of the
iterations. */
if (ref->prefetch_before != PREFETCH_ALL)