From 76e4f444a6eb681a60ffae17a10f55631ba9bf69 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 1 Dec 2020 14:53:30 +0000 Subject: [PATCH] aarch64: Add CPU-specific SVE vector costs struct This patch extends the backend vector costs structures to allow for separate Advanced SIMD and SVE costs. The fields in the current cpu_vector_costs that would vary between the ISAs are moved into a simd_vec_cost struct and we have two typedefs of it: advsimd_vec_cost and sve_vec_costs. If, in the future, SVE needs some extra fields it could inherit from simd_vec_cost. The CPU vector cost tables in aarch64.c are updated for the struct changes. aarch64_builtin_vectorization_cost is updated to select either the Advanced SIMD or SVE costs field depending on the mode and field availability. No change in codegen is intended with this patch. gcc/ * config/aarch64/aarch64-protos.h (cpu_vector_cost): Move simd fields to... (simd_vec_cost): ... Here. Define. (advsimd_vec_cost): Define. (sve_vec_cost): Define. * config/aarch64/aarch64.c (generic_advsimd_vector_cost): Define. (generic_sve_vector_cost): Likewise. (generic_vector_cost): Update. (qdf24xx_advsimd_vector_cost): Define. (qdf24xx_vector_cost): Update. (thunderx_advsimd_vector_cost): Define. (thunderx_vector_cost): Update. (tsv110_advsimd_vector_cost): Define. (tsv110_vector_cost): Likewise. (cortexa57_advsimd_vector_cost): Define. (cortexa57_vector_cost): Update. (exynosm1_advsimd_vector_cost): Define. (exynosm1_vector_cost): Update. (xgene1_advsimd_vector_cost): Define. (xgene1_vector_cost): Update. (thunderx2t99_advsimd_vector_cost): Define. (thunderx2t99_vector_cost): Update. (thunderx3t110_advsimd_vector_cost): Define. (thunderx3t110_vector_cost): Update. (aarch64_builtin_vectorization_cost): Handle sve and advsimd vector cost fields. --- gcc/config/aarch64/aarch64-protos.h | 41 ++-- gcc/config/aarch64/aarch64.c | 279 ++++++++++++++++++---------- 2 files changed, 204 insertions(+), 116 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 2aa3f1fddaa..c8479666d9c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -192,6 +192,29 @@ struct cpu_regmove_cost const int FP2FP; }; +struct simd_vec_cost +{ + const int int_stmt_cost; /* Cost of any int vector operation, + excluding load, store, permute, + vector-to-scalar and + scalar-to-vector operation. */ + const int fp_stmt_cost; /* Cost of any fp vector operation, + excluding load, store, permute, + vector-to-scalar and + scalar-to-vector operation. */ + const int permute_cost; /* Cost of permute operation. */ + const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */ + const int scalar_to_vec_cost; /* Cost of scalar-to-vector + operation. */ + const int align_load_cost; /* Cost of aligned vector load. */ + const int unalign_load_cost; /* Cost of unaligned vector load. */ + const int unalign_store_cost; /* Cost of unaligned vector store. */ + const int store_cost; /* Cost of vector store. */ +}; + +typedef struct simd_vec_cost advsimd_vec_cost; +typedef struct simd_vec_cost sve_vec_cost; + /* Cost for vector insn classes. */ struct cpu_vector_cost { @@ -201,24 +224,10 @@ struct cpu_vector_cost excluding load and store. */ const int scalar_load_cost; /* Cost of scalar load. */ const int scalar_store_cost; /* Cost of scalar store. */ - const int vec_int_stmt_cost; /* Cost of any int vector operation, - excluding load, store, permute, - vector-to-scalar and - scalar-to-vector operation. */ - const int vec_fp_stmt_cost; /* Cost of any fp vector operation, - excluding load, store, permute, - vector-to-scalar and - scalar-to-vector operation. */ - const int vec_permute_cost; /* Cost of permute operation. */ - const int vec_to_scalar_cost; /* Cost of vec-to-scalar operation. */ - const int scalar_to_vec_cost; /* Cost of scalar-to-vector - operation. */ - const int vec_align_load_cost; /* Cost of aligned vector load. */ - const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ - const int vec_unalign_store_cost; /* Cost of unaligned vector store. */ - const int vec_store_cost; /* Cost of vector store. */ const int cond_taken_branch_cost; /* Cost of taken branch. */ const int cond_not_taken_branch_cost; /* Cost of not taken branch. */ + const advsimd_vec_cost *advsimd; /* Cost of Advanced SIMD operations. */ + const sve_vec_cost *sve; /* Cost of SVE operations. */ }; /* Branch costs. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 67ffba02d3e..b79630194c7 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -559,6 +559,34 @@ static const struct cpu_regmove_cost tsv110_regmove_cost = 2 /* FP2FP */ }; +/* Generic costs for Advanced SIMD vector operations. */ +static const advsimd_vec_cost generic_advsimd_vector_cost = +{ + 1, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 2, /* permute_cost */ + 2, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* align_load_cost */ + 1, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + +/* Generic costs for SVE vector operations. */ +static const sve_vec_cost generic_sve_vector_cost = +{ + 1, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 2, /* permute_cost */ + 2, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* align_load_cost */ + 1, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + /* Generic costs for vector insn classes. */ static const struct cpu_vector_cost generic_vector_cost = { @@ -566,17 +594,23 @@ static const struct cpu_vector_cost generic_vector_cost = 1, /* scalar_fp_stmt_cost */ 1, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 1, /* vec_int_stmt_cost */ - 1, /* vec_fp_stmt_cost */ - 2, /* vec_permute_cost */ - 2, /* vec_to_scalar_cost */ - 1, /* scalar_to_vec_cost */ - 1, /* vec_align_load_cost */ - 1, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 3, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &generic_advsimd_vector_cost, /* advsimd */ + &generic_sve_vector_cost /* sve */ +}; + +static const advsimd_vec_cost qdf24xx_advsimd_vector_cost = +{ + 1, /* int_stmt_cost */ + 3, /* fp_stmt_cost */ + 2, /* permute_cost */ + 1, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* align_load_cost */ + 1, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ }; /* QDF24XX costs for vector insn classes. */ @@ -586,17 +620,24 @@ static const struct cpu_vector_cost qdf24xx_vector_cost = 1, /* scalar_fp_stmt_cost */ 1, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 1, /* vec_int_stmt_cost */ - 3, /* vec_fp_stmt_cost */ - 2, /* vec_permute_cost */ - 1, /* vec_to_scalar_cost */ - 1, /* scalar_to_vec_cost */ - 1, /* vec_align_load_cost */ - 1, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 3, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &qdf24xx_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ +}; + + +static const advsimd_vec_cost thunderx_advsimd_vector_cost = +{ + 4, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 4, /* permute_cost */ + 2, /* vec_to_scalar_cost */ + 2, /* scalar_to_vec_cost */ + 3, /* align_load_cost */ + 5, /* unalign_load_cost */ + 5, /* unalign_store_cost */ + 1 /* store_cost */ }; /* ThunderX costs for vector insn classes. */ @@ -606,17 +647,23 @@ static const struct cpu_vector_cost thunderx_vector_cost = 1, /* scalar_fp_stmt_cost */ 3, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 4, /* vec_int_stmt_cost */ - 1, /* vec_fp_stmt_cost */ - 4, /* vec_permute_cost */ - 2, /* vec_to_scalar_cost */ - 2, /* scalar_to_vec_cost */ - 3, /* vec_align_load_cost */ - 5, /* vec_unalign_load_cost */ - 5, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 3, /* cond_taken_branch_cost */ - 3 /* cond_not_taken_branch_cost */ + 3, /* cond_not_taken_branch_cost */ + &thunderx_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ +}; + +static const advsimd_vec_cost tsv110_advsimd_vector_cost = +{ + 2, /* int_stmt_cost */ + 2, /* fp_stmt_cost */ + 2, /* permute_cost */ + 3, /* vec_to_scalar_cost */ + 2, /* scalar_to_vec_cost */ + 5, /* align_load_cost */ + 5, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ }; static const struct cpu_vector_cost tsv110_vector_cost = @@ -625,37 +672,49 @@ static const struct cpu_vector_cost tsv110_vector_cost = 1, /* scalar_fp_stmt_cost */ 5, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 2, /* vec_int_stmt_cost */ - 2, /* vec_fp_stmt_cost */ - 2, /* vec_permute_cost */ - 3, /* vec_to_scalar_cost */ - 2, /* scalar_to_vec_cost */ - 5, /* vec_align_load_cost */ - 5, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 1, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &tsv110_advsimd_vector_cost, /* advsimd */ + NULL, /* sve */ }; -/* Generic costs for vector insn classes. */ +static const advsimd_vec_cost cortexa57_advsimd_vector_cost = +{ + 2, /* int_stmt_cost */ + 2, /* fp_stmt_cost */ + 3, /* permute_cost */ + 8, /* vec_to_scalar_cost */ + 8, /* scalar_to_vec_cost */ + 4, /* align_load_cost */ + 4, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ +}; + +/* Cortex-A57 costs for vector insn classes. */ static const struct cpu_vector_cost cortexa57_vector_cost = { 1, /* scalar_int_stmt_cost */ 1, /* scalar_fp_stmt_cost */ 4, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 2, /* vec_int_stmt_cost */ - 2, /* vec_fp_stmt_cost */ - 3, /* vec_permute_cost */ - 8, /* vec_to_scalar_cost */ - 8, /* scalar_to_vec_cost */ - 4, /* vec_align_load_cost */ - 4, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 1, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &cortexa57_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ +}; + +static const advsimd_vec_cost exynosm1_advsimd_vector_cost = +{ + 3, /* int_stmt_cost */ + 3, /* fp_stmt_cost */ + 3, /* permute_cost */ + 3, /* vec_to_scalar_cost */ + 3, /* scalar_to_vec_cost */ + 5, /* align_load_cost */ + 5, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ }; static const struct cpu_vector_cost exynosm1_vector_cost = @@ -664,17 +723,23 @@ static const struct cpu_vector_cost exynosm1_vector_cost = 1, /* scalar_fp_stmt_cost */ 5, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 3, /* vec_int_stmt_cost */ - 3, /* vec_fp_stmt_cost */ - 3, /* vec_permute_cost */ - 3, /* vec_to_scalar_cost */ - 3, /* scalar_to_vec_cost */ - 5, /* vec_align_load_cost */ - 5, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 1, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &exynosm1_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ +}; + +static const advsimd_vec_cost xgene1_advsimd_vector_cost = +{ + 2, /* int_stmt_cost */ + 2, /* fp_stmt_cost */ + 2, /* permute_cost */ + 4, /* vec_to_scalar_cost */ + 4, /* scalar_to_vec_cost */ + 10, /* align_load_cost */ + 10, /* unalign_load_cost */ + 2, /* unalign_store_cost */ + 2 /* store_cost */ }; /* Generic costs for vector insn classes. */ @@ -684,17 +749,23 @@ static const struct cpu_vector_cost xgene1_vector_cost = 1, /* scalar_fp_stmt_cost */ 5, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 2, /* vec_int_stmt_cost */ - 2, /* vec_fp_stmt_cost */ - 2, /* vec_permute_cost */ - 4, /* vec_to_scalar_cost */ - 4, /* scalar_to_vec_cost */ - 10, /* vec_align_load_cost */ - 10, /* vec_unalign_load_cost */ - 2, /* vec_unalign_store_cost */ - 2, /* vec_store_cost */ 2, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &xgene1_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ +}; + +static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost = +{ + 4, /* int_stmt_cost */ + 5, /* fp_stmt_cost */ + 10, /* permute_cost */ + 6, /* vec_to_scalar_cost */ + 5, /* scalar_to_vec_cost */ + 4, /* align_load_cost */ + 4, /* unalign_load_cost */ + 1, /* unalign_store_cost */ + 1 /* store_cost */ }; /* Costs for vector insn classes for Vulcan. */ @@ -704,17 +775,23 @@ static const struct cpu_vector_cost thunderx2t99_vector_cost = 6, /* scalar_fp_stmt_cost */ 4, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 4, /* vec_int_stmt_cost */ - 5, /* vec_fp_stmt_cost */ - 10, /* vec_permute_cost */ - 6, /* vec_to_scalar_cost */ - 5, /* scalar_to_vec_cost */ - 4, /* vec_align_load_cost */ - 4, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ 2, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &thunderx2t99_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ +}; + +static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost = +{ + 5, /* int_stmt_cost */ + 5, /* fp_stmt_cost */ + 10, /* permute_cost */ + 5, /* vec_to_scalar_cost */ + 5, /* scalar_to_vec_cost */ + 4, /* align_load_cost */ + 4, /* unalign_load_cost */ + 4, /* unalign_store_cost */ + 4 /* store_cost */ }; static const struct cpu_vector_cost thunderx3t110_vector_cost = @@ -723,17 +800,10 @@ static const struct cpu_vector_cost thunderx3t110_vector_cost = 5, /* scalar_fp_stmt_cost */ 4, /* scalar_load_cost */ 1, /* scalar_store_cost */ - 5, /* vec_int_stmt_cost */ - 5, /* vec_fp_stmt_cost */ - 10, /* vec_permute_cost */ - 5, /* vec_to_scalar_cost */ - 5, /* scalar_to_vec_cost */ - 4, /* vec_align_load_cost */ - 4, /* vec_unalign_load_cost */ - 4, /* vec_unalign_store_cost */ - 4, /* vec_store_cost */ 2, /* cond_taken_branch_cost */ - 1 /* cond_not_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &thunderx3t110_advsimd_vector_cost, /* advsimd */ + NULL /* sve */ }; @@ -13712,6 +13782,13 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, if (vectype != NULL) fp = FLOAT_TYPE_P (vectype); + const simd_vec_cost *simd_costs; + if (vectype != NULL && aarch64_sve_mode_p (TYPE_MODE (vectype)) + && costs->sve != NULL) + simd_costs = costs->sve; + else + simd_costs = costs->advsimd; + switch (type_of_cost) { case scalar_stmt: @@ -13724,27 +13801,28 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return costs->scalar_store_cost; case vector_stmt: - return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost; + return fp ? simd_costs->fp_stmt_cost + : simd_costs->int_stmt_cost; case vector_load: - return costs->vec_align_load_cost; + return simd_costs->align_load_cost; case vector_store: - return costs->vec_store_cost; + return simd_costs->store_cost; case vec_to_scalar: - return costs->vec_to_scalar_cost; + return simd_costs->vec_to_scalar_cost; case scalar_to_vec: - return costs->scalar_to_vec_cost; + return simd_costs->scalar_to_vec_cost; case unaligned_load: case vector_gather_load: - return costs->vec_unalign_load_cost; + return simd_costs->unalign_load_cost; case unaligned_store: case vector_scatter_store: - return costs->vec_unalign_store_cost; + return simd_costs->unalign_store_cost; case cond_branch_taken: return costs->cond_taken_branch_cost; @@ -13753,10 +13831,11 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return costs->cond_not_taken_branch_cost; case vec_perm: - return costs->vec_permute_cost; + return simd_costs->permute_cost; case vec_promote_demote: - return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost; + return fp ? simd_costs->fp_stmt_cost + : simd_costs->int_stmt_cost; case vec_construct: elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); -- 2.30.2