aarch64: Add CPU-specific SVE vector costs struct

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Tue, 1 Dec 2020 14:53:30 +0000 (14:53 +0000)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 9 Dec 2020 14:05:30 +0000 (14:05 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 1 Dec 2020 14:53:30 +0000 (14:53 +0000)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 9 Dec 2020 14:05:30 +0000 (14:05 +0000)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index 2aa3f1fddaafae58f0bfb26e5b33fe6a94e85e06..c8479666d9c9507470cbe131a41b49420faf18a0 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -192,6 +192,29 @@ struct cpu_regmove_cost
    const int FP2FP;
  };
  
+struct simd_vec_cost
+{
+  const int int_stmt_cost;             /* Cost of any int vector operation,
+                                          excluding load, store, permute,
+                                          vector-to-scalar and
+                                          scalar-to-vector operation.  */
+  const int fp_stmt_cost;               /* Cost of any fp vector operation,
+                                           excluding load, store, permute,
+                                           vector-to-scalar and
+                                           scalar-to-vector operation.  */
+  const int permute_cost;               /* Cost of permute operation.  */
+  const int vec_to_scalar_cost;                 /* Cost of vec-to-scalar operation.  */
+  const int scalar_to_vec_cost;                 /* Cost of scalar-to-vector
+                                           operation.  */
+  const int align_load_cost;    /* Cost of aligned vector load.  */
+  const int unalign_load_cost;  /* Cost of unaligned vector load.  */
+  const int unalign_store_cost;         /* Cost of unaligned vector store.  */
+  const int store_cost;                 /* Cost of vector store.  */
+};
+
+typedef struct simd_vec_cost advsimd_vec_cost;
+typedef struct simd_vec_cost sve_vec_cost;
+
  /* Cost for vector insn classes.  */
  struct cpu_vector_cost
  {
@@ -201,24 +224,10 @@ struct cpu_vector_cost
                                             excluding load and store.  */
    const int scalar_load_cost;           /* Cost of scalar load.  */
    const int scalar_store_cost;          /* Cost of scalar store.  */
-  const int vec_int_stmt_cost;          /* Cost of any int vector operation,
-                                           excluding load, store, permute,
-                                           vector-to-scalar and
-                                           scalar-to-vector operation.  */
-  const int vec_fp_stmt_cost;           /* Cost of any fp vector operation,
-                                           excluding load, store, permute,
-                                           vector-to-scalar and
-                                           scalar-to-vector operation.  */
-  const int vec_permute_cost;           /* Cost of permute operation.  */
-  const int vec_to_scalar_cost;                 /* Cost of vec-to-scalar operation.  */
-  const int scalar_to_vec_cost;                 /* Cost of scalar-to-vector
-                                           operation.  */
-  const int vec_align_load_cost;        /* Cost of aligned vector load.  */
-  const int vec_unalign_load_cost;      /* Cost of unaligned vector load.  */
-  const int vec_unalign_store_cost;     /* Cost of unaligned vector store.  */
-  const int vec_store_cost;             /* Cost of vector store.  */
    const int cond_taken_branch_cost;     /* Cost of taken branch.  */
    const int cond_not_taken_branch_cost;  /* Cost of not taken branch.  */
+  const advsimd_vec_cost *advsimd;      /* Cost of Advanced SIMD operations.  */
+  const sve_vec_cost *sve;              /* Cost of SVE operations.  */
  };
  
  /* Branch costs.  */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 67ffba02d3e650516d57280566de19ec51a02cdd..b79630194c70bdd2e72b9ad3dab5d85824d2f1be 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -559,6 +559,34 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
    2  /* FP2FP  */
  };
  
+/* Generic costs for Advanced SIMD vector operations.   */
+static const advsimd_vec_cost generic_advsimd_vector_cost =
+{
+  1, /* int_stmt_cost  */
+  1, /* fp_stmt_cost  */
+  2, /* permute_cost  */
+  2, /* vec_to_scalar_cost  */
+  1, /* scalar_to_vec_cost  */
+  1, /* align_load_cost  */
+  1, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+/* Generic costs for SVE vector operations.  */
+static const sve_vec_cost generic_sve_vector_cost =
+{
+  1, /* int_stmt_cost  */
+  1, /* fp_stmt_cost  */
+  2, /* permute_cost  */
+  2, /* vec_to_scalar_cost  */
+  1, /* scalar_to_vec_cost  */
+  1, /* align_load_cost  */
+  1, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
  /* Generic costs for vector insn classes.  */
  static const struct cpu_vector_cost generic_vector_cost =
  {
@@ -566,17 +594,23 @@ static const struct cpu_vector_cost generic_vector_cost =
    1, /* scalar_fp_stmt_cost  */
    1, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  1, /* vec_int_stmt_cost  */
-  1, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
-  2, /* vec_to_scalar_cost  */
-  1, /* scalar_to_vec_cost  */
-  1, /* vec_align_load_cost  */
-  1, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    3, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &generic_advsimd_vector_cost, /* advsimd  */
+  &generic_sve_vector_cost /* sve */
+};
+
+static const advsimd_vec_cost qdf24xx_advsimd_vector_cost =
+{
+  1, /* int_stmt_cost  */
+  3, /* fp_stmt_cost  */
+  2, /* permute_cost  */
+  1, /* vec_to_scalar_cost  */
+  1, /* scalar_to_vec_cost  */
+  1, /* align_load_cost  */
+  1, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
  };
  
  /* QDF24XX costs for vector insn classes.  */
@@ -586,17 +620,24 @@ static const struct cpu_vector_cost qdf24xx_vector_cost =
    1, /* scalar_fp_stmt_cost  */
    1, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  1, /* vec_int_stmt_cost  */
-  3, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
-  1, /* vec_to_scalar_cost  */
-  1, /* scalar_to_vec_cost  */
-  1, /* vec_align_load_cost  */
-  1, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    3, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &qdf24xx_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
+};
+
+
+static const advsimd_vec_cost thunderx_advsimd_vector_cost =
+{
+  4, /* int_stmt_cost  */
+  1, /* fp_stmt_cost  */
+  4, /* permute_cost  */
+  2, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  3, /* align_load_cost  */
+  5, /* unalign_load_cost  */
+  5, /* unalign_store_cost  */
+  1  /* store_cost  */
  };
  
  /* ThunderX costs for vector insn classes.  */
@@ -606,17 +647,23 @@ static const struct cpu_vector_cost thunderx_vector_cost =
    1, /* scalar_fp_stmt_cost  */
    3, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  4, /* vec_int_stmt_cost  */
-  1, /* vec_fp_stmt_cost  */
-  4, /* vec_permute_cost  */
-  2, /* vec_to_scalar_cost  */
-  2, /* scalar_to_vec_cost  */
-  3, /* vec_align_load_cost  */
-  5, /* vec_unalign_load_cost  */
-  5, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    3, /* cond_taken_branch_cost  */
-  3 /* cond_not_taken_branch_cost  */
+  3, /* cond_not_taken_branch_cost  */
+  &thunderx_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
+};
+
+static const advsimd_vec_cost tsv110_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  2, /* permute_cost  */
+  3, /* vec_to_scalar_cost  */
+  2, /* scalar_to_vec_cost  */
+  5, /* align_load_cost  */
+  5, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
  };
  
  static const struct cpu_vector_cost tsv110_vector_cost =
@@ -625,37 +672,49 @@ static const struct cpu_vector_cost tsv110_vector_cost =
    1, /* scalar_fp_stmt_cost  */
    5, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  2, /* vec_int_stmt_cost  */
-  2, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
-  3, /* vec_to_scalar_cost  */
-  2, /* scalar_to_vec_cost  */
-  5, /* vec_align_load_cost  */
-  5, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    1, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &tsv110_advsimd_vector_cost, /* advsimd  */
+  NULL, /* sve  */
  };
  
-/* Generic costs for vector insn classes.  */
+static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  3, /* permute_cost  */
+  8, /* vec_to_scalar_cost  */
+  8, /* scalar_to_vec_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+/* Cortex-A57 costs for vector insn classes.  */
  static const struct cpu_vector_cost cortexa57_vector_cost =
  {
    1, /* scalar_int_stmt_cost  */
    1, /* scalar_fp_stmt_cost  */
    4, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  2, /* vec_int_stmt_cost  */
-  2, /* vec_fp_stmt_cost  */
-  3, /* vec_permute_cost  */
-  8, /* vec_to_scalar_cost  */
-  8, /* scalar_to_vec_cost  */
-  4, /* vec_align_load_cost  */
-  4, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    1, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &cortexa57_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
+};
+
+static const advsimd_vec_cost exynosm1_advsimd_vector_cost =
+{
+  3, /* int_stmt_cost  */
+  3, /* fp_stmt_cost  */
+  3, /* permute_cost  */
+  3, /* vec_to_scalar_cost  */
+  3, /* scalar_to_vec_cost  */
+  5, /* align_load_cost  */
+  5, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
  };
  
  static const struct cpu_vector_cost exynosm1_vector_cost =
@@ -664,17 +723,23 @@ static const struct cpu_vector_cost exynosm1_vector_cost =
    1, /* scalar_fp_stmt_cost  */
    5, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  3, /* vec_int_stmt_cost  */
-  3, /* vec_fp_stmt_cost  */
-  3, /* vec_permute_cost  */
-  3, /* vec_to_scalar_cost  */
-  3, /* scalar_to_vec_cost  */
-  5, /* vec_align_load_cost  */
-  5, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    1, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &exynosm1_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
+};
+
+static const advsimd_vec_cost xgene1_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  2, /* permute_cost  */
+  4, /* vec_to_scalar_cost  */
+  4, /* scalar_to_vec_cost  */
+  10, /* align_load_cost  */
+  10, /* unalign_load_cost  */
+  2, /* unalign_store_cost  */
+  2  /* store_cost  */
  };
  
  /* Generic costs for vector insn classes.  */
@@ -684,17 +749,23 @@ static const struct cpu_vector_cost xgene1_vector_cost =
    1, /* scalar_fp_stmt_cost  */
    5, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  2, /* vec_int_stmt_cost  */
-  2, /* vec_fp_stmt_cost  */
-  2, /* vec_permute_cost  */
-  4, /* vec_to_scalar_cost  */
-  4, /* scalar_to_vec_cost  */
-  10, /* vec_align_load_cost  */
-  10, /* vec_unalign_load_cost  */
-  2, /* vec_unalign_store_cost  */
-  2, /* vec_store_cost  */
    2, /* cond_taken_branch_cost  */
-  1 /* cond_not_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &xgene1_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
+};
+
+static const advsimd_vec_cost thunderx2t99_advsimd_vector_cost =
+{
+  4, /* int_stmt_cost  */
+  5, /* fp_stmt_cost  */
+  10, /* permute_cost  */
+  6, /* vec_to_scalar_cost  */
+  5, /* scalar_to_vec_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
  };
  
  /* Costs for vector insn classes for Vulcan.  */
@@ -704,17 +775,23 @@ static const struct cpu_vector_cost thunderx2t99_vector_cost =
    6, /* scalar_fp_stmt_cost  */
    4, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  4, /* vec_int_stmt_cost  */
-  5, /* vec_fp_stmt_cost  */
-  10, /* vec_permute_cost  */
-  6, /* vec_to_scalar_cost  */
-  5, /* scalar_to_vec_cost  */
-  4, /* vec_align_load_cost  */
-  4, /* vec_unalign_load_cost  */
-  1, /* vec_unalign_store_cost  */
-  1, /* vec_store_cost  */
    2, /* cond_taken_branch_cost  */
-  1  /* cond_not_taken_branch_cost  */
+  1,  /* cond_not_taken_branch_cost  */
+  &thunderx2t99_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
+};
+
+static const advsimd_vec_cost thunderx3t110_advsimd_vector_cost =
+{
+  5, /* int_stmt_cost  */
+  5, /* fp_stmt_cost  */
+  10, /* permute_cost  */
+  5, /* vec_to_scalar_cost  */
+  5, /* scalar_to_vec_cost  */
+  4, /* align_load_cost  */
+  4, /* unalign_load_cost  */
+  4, /* unalign_store_cost  */
+  4  /* store_cost  */
  };
  
  static const struct cpu_vector_cost thunderx3t110_vector_cost =
@@ -723,17 +800,10 @@ static const struct cpu_vector_cost thunderx3t110_vector_cost =
    5, /* scalar_fp_stmt_cost  */
    4, /* scalar_load_cost  */
    1, /* scalar_store_cost  */
-  5, /* vec_int_stmt_cost  */
-  5, /* vec_fp_stmt_cost  */
-  10, /* vec_permute_cost  */
-  5, /* vec_to_scalar_cost  */
-  5, /* scalar_to_vec_cost  */
-  4, /* vec_align_load_cost  */
-  4, /* vec_unalign_load_cost  */
-  4, /* vec_unalign_store_cost  */
-  4, /* vec_store_cost  */
    2, /* cond_taken_branch_cost  */
-  1  /* cond_not_taken_branch_cost  */
+  1,  /* cond_not_taken_branch_cost  */
+  &thunderx3t110_advsimd_vector_cost, /* advsimd  */
+  NULL /* sve  */
  };
  
  
@@ -13712,6 +13782,13 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
    if (vectype != NULL)
      fp = FLOAT_TYPE_P (vectype);
  
+  const simd_vec_cost *simd_costs;
+  if (vectype != NULL && aarch64_sve_mode_p (TYPE_MODE (vectype))
+      && costs->sve != NULL)
+    simd_costs = costs->sve;
+  else
+    simd_costs = costs->advsimd;
+
    switch (type_of_cost)
      {
        case scalar_stmt:
@@ -13724,27 +13801,28 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         return costs->scalar_store_cost;
  
        case vector_stmt:
-       return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
+       return fp ? simd_costs->fp_stmt_cost
+                 : simd_costs->int_stmt_cost;
  
        case vector_load:
-       return costs->vec_align_load_cost;
+       return simd_costs->align_load_cost;
  
        case vector_store:
-       return costs->vec_store_cost;
+       return simd_costs->store_cost;
  
        case vec_to_scalar:
-       return costs->vec_to_scalar_cost;
+       return simd_costs->vec_to_scalar_cost;
  
        case scalar_to_vec:
-       return costs->scalar_to_vec_cost;
+       return simd_costs->scalar_to_vec_cost;
  
        case unaligned_load:
        case vector_gather_load:
-       return costs->vec_unalign_load_cost;
+       return simd_costs->unalign_load_cost;
  
        case unaligned_store:
        case vector_scatter_store:
-       return costs->vec_unalign_store_cost;
+       return simd_costs->unalign_store_cost;
  
        case cond_branch_taken:
         return costs->cond_taken_branch_cost;
@@ -13753,10 +13831,11 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         return costs->cond_not_taken_branch_cost;
  
        case vec_perm:
-       return costs->vec_permute_cost;
+       return simd_costs->permute_cost;
  
        case vec_promote_demote:
-       return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
+       return fp ? simd_costs->fp_stmt_cost
+                 : simd_costs->int_stmt_cost;
  
        case vec_construct:
         elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Tue, 1 Dec 2020 14:53:30 +0000 (14:53 +0000)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 9 Dec 2020 14:05:30 +0000 (14:05 +0000)
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history