From 9d2c6e2eb72324b10160ce1117a8ab600be94bad Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Fri, 16 Jun 2017 09:30:43 +0000 Subject: [PATCH] Add prefetch configuration to aarch64 backend. * config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune): New tune structure. (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size. [Unrelated to main purpose of the patch] Place the pointer field last to enable type checking errors when tune structure are wrongly merged. * config/aarch64/aarch64.c (generic_prefetch_tune,) (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,) (thunderx2t99_prefetch_tune): New tune constants. (tune_params *_tunings): Update all tunings (no functional change). (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES, PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE from tunings structures. Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94 From-SVN: r249240 --- gcc/ChangeLog | 15 ++++ gcc/config/aarch64/aarch64-protos.h | 16 ++++- gcc/config/aarch64/aarch64.c | 102 +++++++++++++++++++++------- 3 files changed, 105 insertions(+), 28 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d6f2d7240d4..a78e7363a69 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2017-06-16 Maxim Kuvyrkov + + * config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune): + New tune structure. + (struct tune_params): Use cpu_prefetch_tune instead of cache_line_size. + [Unrelated to main purpose of the patch] Place the pointer field last + to enable type checking errors when tune structure are wrongly merged. + * config/aarch64/aarch64.c (generic_prefetch_tune,) + (exynosm1_prefetch_tune, qdf24xx_prefetch_tune,) + (thunderx2t99_prefetch_tune): New tune constants. + (tune_params *_tunings): Update all tunings (no functional change). + (aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES, + PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE + from tunings structures. + 2017-06-16 Jakub Jelinek PR sanitizer/81094 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index ac91865b75f..bb06139c4bb 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -203,6 +203,15 @@ struct cpu_approx_modes const unsigned int recip_sqrt; /* Reciprocal square root. */ }; +/* Cache prefetch settings for prefetch-loop-arrays. */ +struct cpu_prefetch_tune +{ + const int num_slots; + const int l1_cache_size; + const int l1_cache_line_size; + const int l2_cache_size; +}; + struct tune_params { const struct cpu_cost_table *insn_extra_cost; @@ -224,9 +233,6 @@ struct tune_params int min_div_recip_mul_df; /* Value for aarch64_case_values_threshold; or 0 for the default. */ unsigned int max_case_values; - /* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default. */ - unsigned int cache_line_size; - /* An enum specifying how to take into account CPU autoprefetch capabilities during instruction scheduling: - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account. @@ -244,6 +250,10 @@ struct tune_params } autoprefetcher_model; unsigned int extra_tuning_flags; + + /* Place prefetch struct pointer at the end to enable type checking + errors when tune_params misses elements (e.g., from erroneous merges). */ + const struct cpu_prefetch_tune *prefetch; }; #define AARCH64_FUSION_PAIR(x, name) \ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 239ba72570f..e3296c0570f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes = AARCH64_APPROX_ALL /* recip_sqrt */ }; +/* Generic prefetch settings (which disable prefetch). */ +static const cpu_prefetch_tune generic_prefetch_tune = +{ + 0, /* num_slots */ + -1, /* l1_cache_size */ + -1, /* l1_cache_line_size */ + -1 /* l2_cache_size */ +}; + +static const cpu_prefetch_tune exynosm1_prefetch_tune = +{ + 0, /* num_slots */ + -1, /* l1_cache_size */ + 64, /* l1_cache_line_size */ + -1 /* l2_cache_size */ +}; + +static const cpu_prefetch_tune qdf24xx_prefetch_tune = +{ + 0, /* num_slots */ + -1, /* l1_cache_size */ + 64, /* l1_cache_line_size */ + -1 /* l2_cache_size */ +}; + +static const cpu_prefetch_tune thunderx2t99_prefetch_tune = +{ + 0, /* num_slots */ + -1, /* l1_cache_size */ + 64, /* l1_cache_line_size */ + -1 /* l2_cache_size */ +}; + static const struct tune_params generic_tunings = { &cortexa57_extra_costs, @@ -546,9 +579,9 @@ static const struct tune_params generic_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params cortexa35_tunings = @@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params cortexa53_tunings = @@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params cortexa57_tunings = @@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params cortexa72_tunings = @@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params cortexa73_tunings = @@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune }; + + static const struct tune_params exynosm1_tunings = { &exynosm1_extra_costs, @@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 48, /* max_case_values. */ - 64, /* cache_line_size. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &exynosm1_prefetch_tune }; static const struct tune_params thunderx_tunings = @@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params xgene1_tunings = @@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 0, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune }; static const struct tune_params qdf24xx_tunings = @@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 64, /* cache_line_size. */ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &qdf24xx_prefetch_tune }; static const struct tune_params thunderx2t99_tunings = @@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings = 2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ - 64, /* cache_line_size. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &thunderx2t99_prefetch_tune }; /* Support for fine-grained override of the tuning structures. */ @@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts) opts->x_param_values, global_options_set.x_param_values); - /* Set the L1 cache line size. */ - if (selected_cpu->tune->cache_line_size != 0) + /* Set up parameters to be used in prefetching algorithm. Do not + override the defaults unless we are tuning for a core we have + researched values for. */ + if (aarch64_tune_params.prefetch->num_slots > 0) + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, + aarch64_tune_params.prefetch->num_slots, + opts->x_param_values, + global_options_set.x_param_values); + if (aarch64_tune_params.prefetch->l1_cache_size >= 0) + maybe_set_param_value (PARAM_L1_CACHE_SIZE, + aarch64_tune_params.prefetch->l1_cache_size, + opts->x_param_values, + global_options_set.x_param_values); + if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0) maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, - selected_cpu->tune->cache_line_size, + aarch64_tune_params.prefetch->l1_cache_line_size, + opts->x_param_values, + global_options_set.x_param_values); + if (aarch64_tune_params.prefetch->l2_cache_size >= 0) + maybe_set_param_value (PARAM_L2_CACHE_SIZE, + aarch64_tune_params.prefetch->l2_cache_size, opts->x_param_values, global_options_set.x_param_values); -- 2.30.2