From: Richard Sandiford Date: Mon, 29 Jul 2019 08:40:21 +0000 (+0000) Subject: Implement more rtx vector folds on variable-length vectors X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4ce6ab6889446984fd7017e2150962eb4550a7ee;p=gcc.git Implement more rtx vector folds on variable-length vectors This patch extends the tree-level folding of variable-length vectors so that it can also be used on rtxes. The first step is to move the tree_vector_builder new_unary/binary_operator routines to the parent vector_builder class (which in turn means adding a new template parameter). The second step is to make simplify-rtx.c use a direct rtx analogue of the VECTOR_CST handling in fold-const.c. 2019-07-29 Richard Sandiford gcc/ * vector-builder.h (vector_builder): Add a shape template parameter. (vector_builder::new_unary_operation): New function, generalizing the old tree_vector_builder function. (vector_builder::new_binary_operation): Likewise. (vector_builder::binary_encoded_nelts): Likewise. * int-vector-builder.h (int_vector_builder): Update template parameters to vector_builder. (int_vector_builder::shape_nelts): New function. * rtx-vector-builder.h (rtx_vector_builder): Update template parameters to vector_builder. (rtx_vector_builder::shape_nelts): New function. (rtx_vector_builder::nelts_of): Likewise. (rtx_vector_builder::npatterns_of): Likewise. (rtx_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.h (tree_vector_builder): Update template parameters to vector_builder. (tree_vector_builder::shape_nelts): New function. (tree_vector_builder::nelts_of): Likewise. (tree_vector_builder::npatterns_of): Likewise. (tree_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.c (tree_vector_builder::new_unary_operation) (tree_vector_builder::new_binary_operation): Delete. (tree_vector_builder::binary_encoded_nelts): Likewise. * simplify-rtx.c: Include rtx-vector-builder.h. (distributes_over_addition_p): New function. (simplify_const_unary_operation) (simplify_const_binary_operation): Generalize handling of vector constants to include variable-length vectors. (test_vector_ops_series): Add more tests. From-SVN: r273867 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 24da94a8a50..d80cd319bb6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2019-07-29 Richard Sandiford + + * vector-builder.h (vector_builder): Add a shape template parameter. + (vector_builder::new_unary_operation): New function, generalizing + the old tree_vector_builder function. + (vector_builder::new_binary_operation): Likewise. + (vector_builder::binary_encoded_nelts): Likewise. + * int-vector-builder.h (int_vector_builder): Update template + parameters to vector_builder. + (int_vector_builder::shape_nelts): New function. + * rtx-vector-builder.h (rtx_vector_builder): Update template + parameters to vector_builder. + (rtx_vector_builder::shape_nelts): New function. + (rtx_vector_builder::nelts_of): Likewise. + (rtx_vector_builder::npatterns_of): Likewise. + (rtx_vector_builder::nelts_per_pattern_of): Likewise. + * tree-vector-builder.h (tree_vector_builder): Update template + parameters to vector_builder. + (tree_vector_builder::shape_nelts): New function. + (tree_vector_builder::nelts_of): Likewise. + (tree_vector_builder::npatterns_of): Likewise. + (tree_vector_builder::nelts_per_pattern_of): Likewise. + * tree-vector-builder.c (tree_vector_builder::new_unary_operation) + (tree_vector_builder::new_binary_operation): Delete. + (tree_vector_builder::binary_encoded_nelts): Likewise. + * simplify-rtx.c: Include rtx-vector-builder.h. + (distributes_over_addition_p): New function. + (simplify_const_unary_operation) + (simplify_const_binary_operation): Generalize handling of vector + constants to include variable-length vectors. + (test_vector_ops_series): Add more tests. + 2019-07-28 Jan Hubicka PR lto/91222 diff --git a/gcc/int-vector-builder.h b/gcc/int-vector-builder.h index adf0904c5b0..dc96510217c 100644 --- a/gcc/int-vector-builder.h +++ b/gcc/int-vector-builder.h @@ -26,10 +26,11 @@ along with GCC; see the file COPYING3. If not see encoding as tree and rtx constants. See vector_builder for more details. */ template -class int_vector_builder : public vector_builder > +class int_vector_builder : public vector_builder > { - typedef vector_builder parent; - friend class vector_builder; + typedef vector_builder parent; + friend class vector_builder; public: int_vector_builder () {} @@ -45,6 +46,8 @@ private: T apply_step (T, unsigned int, T) const; bool can_elide_p (T) const { return true; } void note_representative (T *, T) {} + + static poly_uint64 shape_nelts (poly_uint64 x) { return x; } }; /* Create a new builder for a vector with FULL_NELTS elements. diff --git a/gcc/rtx-vector-builder.h b/gcc/rtx-vector-builder.h index d5950e2b882..08b55dd3626 100644 --- a/gcc/rtx-vector-builder.h +++ b/gcc/rtx-vector-builder.h @@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see /* This class is used to build VECTOR_CSTs from a sequence of elements. See vector_builder for more details. */ -class rtx_vector_builder : public vector_builder +class rtx_vector_builder : public vector_builder { - typedef vector_builder parent; - friend class vector_builder; + typedef vector_builder parent; + friend class vector_builder; public: rtx_vector_builder () : m_mode (VOIDmode) {} @@ -48,6 +49,15 @@ private: bool can_elide_p (rtx) const { return true; } void note_representative (rtx *, rtx) {} + static poly_uint64 shape_nelts (machine_mode mode) + { return GET_MODE_NUNITS (mode); } + static poly_uint64 nelts_of (const_rtx x) + { return CONST_VECTOR_NUNITS (x); } + static unsigned int npatterns_of (const_rtx x) + { return CONST_VECTOR_NPATTERNS (x); } + static unsigned int nelts_per_pattern_of (const_rtx x) + { return CONST_VECTOR_NELTS_PER_PATTERN (x); } + rtx find_cached_value (); machine_mode m_mode; diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 4892c49efcd..0a69c0503ab 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see #include "flags.h" #include "selftest.h" #include "selftest-rtl.h" +#include "rtx-vector-builder.h" /* Simplification and canonicalization of RTL. */ @@ -1753,27 +1754,23 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode, if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) { - unsigned int n_elts; - if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts)) - return NULL_RTX; - - machine_mode opmode = GET_MODE (op); - gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts)); - gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts)); + gcc_assert (GET_MODE (op) == op_mode); - rtvec v = rtvec_alloc (n_elts); - unsigned int i; + rtx_vector_builder builder; + if (!builder.new_unary_operation (mode, op, false)) + return 0; - for (i = 0; i < n_elts; i++) + unsigned int count = builder.encoded_nelts (); + for (unsigned int i = 0; i < count; i++) { rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode), CONST_VECTOR_ELT (op, i), - GET_MODE_INNER (opmode)); + GET_MODE_INNER (op_mode)); if (!x || !valid_for_const_vector_p (mode, x)) return 0; - RTVEC_ELT (v, i) = x; + builder.quick_push (x); } - return gen_rtx_CONST_VECTOR (mode, v); + return builder.build (); } /* The order of these tests is critical so that, for example, we don't @@ -4059,6 +4056,27 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, return 0; } +/* Return true if binary operation OP distributes over addition in operand + OPNO, with the other operand being held constant. OPNO counts from 1. */ + +static bool +distributes_over_addition_p (rtx_code op, int opno) +{ + switch (op) + { + case PLUS: + case MINUS: + case MULT: + return true; + + case ASHIFT: + return opno == 1; + + default: + return false; + } +} + rtx simplify_const_binary_operation (enum rtx_code code, machine_mode mode, rtx op0, rtx op1) @@ -4068,26 +4086,45 @@ simplify_const_binary_operation (enum rtx_code code, machine_mode mode, && GET_CODE (op0) == CONST_VECTOR && GET_CODE (op1) == CONST_VECTOR) { - unsigned int n_elts; - if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts)) - return NULL_RTX; - - gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1))); - gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode))); - rtvec v = rtvec_alloc (n_elts); - unsigned int i; + bool step_ok_p; + if (CONST_VECTOR_STEPPED_P (op0) + && CONST_VECTOR_STEPPED_P (op1)) + /* We can operate directly on the encoding if: + + a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 + implies + (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) + + Addition and subtraction are the supported operators + for which this is true. */ + step_ok_p = (code == PLUS || code == MINUS); + else if (CONST_VECTOR_STEPPED_P (op0)) + /* We can operate directly on stepped encodings if: + + a3 - a2 == a2 - a1 + implies: + (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) + + which is true if (x -> x op c) distributes over addition. */ + step_ok_p = distributes_over_addition_p (code, 1); + else + /* Similarly in reverse. */ + step_ok_p = distributes_over_addition_p (code, 2); + rtx_vector_builder builder; + if (!builder.new_binary_operation (mode, op0, op1, step_ok_p)) + return 0; - for (i = 0; i < n_elts; i++) + unsigned int count = builder.encoded_nelts (); + for (unsigned int i = 0; i < count; i++) { rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode), CONST_VECTOR_ELT (op0, i), CONST_VECTOR_ELT (op1, i)); if (!x || !valid_for_const_vector_p (mode, x)) return 0; - RTVEC_ELT (v, i) = x; + builder.quick_push (x); } - - return gen_rtx_CONST_VECTOR (mode, v); + return builder.build (); } if (VECTOR_MODE_P (mode) @@ -7012,6 +7049,58 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) ASSERT_RTX_EQ (series_0_m1, simplify_binary_operation (VEC_SERIES, mode, const0_rtx, constm1_rtx)); + + /* Test NEG on constant vector series. */ + ASSERT_RTX_EQ (series_0_m1, + simplify_unary_operation (NEG, mode, series_0_1, mode)); + ASSERT_RTX_EQ (series_0_1, + simplify_unary_operation (NEG, mode, series_0_m1, mode)); + + /* Test PLUS and MINUS on constant vector series. */ + rtx scalar2 = gen_int_mode (2, inner_mode); + rtx scalar3 = gen_int_mode (3, inner_mode); + rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx); + rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2); + rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3); + ASSERT_RTX_EQ (series_1_1, + simplify_binary_operation (PLUS, mode, series_0_1, + CONST1_RTX (mode))); + ASSERT_RTX_EQ (series_0_m1, + simplify_binary_operation (PLUS, mode, CONST0_RTX (mode), + series_0_m1)); + ASSERT_RTX_EQ (series_1_3, + simplify_binary_operation (PLUS, mode, series_1_1, + series_0_2)); + ASSERT_RTX_EQ (series_0_1, + simplify_binary_operation (MINUS, mode, series_1_1, + CONST1_RTX (mode))); + ASSERT_RTX_EQ (series_1_1, + simplify_binary_operation (MINUS, mode, CONST1_RTX (mode), + series_0_m1)); + ASSERT_RTX_EQ (series_1_1, + simplify_binary_operation (MINUS, mode, series_1_3, + series_0_2)); + + /* Test MULT between constant vectors. */ + rtx vec2 = gen_const_vec_duplicate (mode, scalar2); + rtx vec3 = gen_const_vec_duplicate (mode, scalar3); + rtx scalar9 = gen_int_mode (9, inner_mode); + rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9); + ASSERT_RTX_EQ (series_0_2, + simplify_binary_operation (MULT, mode, series_0_1, vec2)); + ASSERT_RTX_EQ (series_3_9, + simplify_binary_operation (MULT, mode, vec3, series_1_3)); + if (!GET_MODE_NUNITS (mode).is_constant ()) + ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1, + series_0_1)); + + /* Test ASHIFT between constant vectors. */ + ASSERT_RTX_EQ (series_0_2, + simplify_binary_operation (ASHIFT, mode, series_0_1, + CONST1_RTX (mode))); + if (!GET_MODE_NUNITS (mode).is_constant ()) + ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode), + series_0_1)); } /* Verify simplify_merge_mask works correctly. */ diff --git a/gcc/tree-vector-builder.c b/gcc/tree-vector-builder.c index f31dc13b4aa..d02fb950c5e 100644 --- a/gcc/tree-vector-builder.c +++ b/gcc/tree-vector-builder.c @@ -24,103 +24,6 @@ along with GCC; see the file COPYING3. If not see #include "fold-const.h" #include "tree-vector-builder.h" -/* Try to start building a new vector of type TYPE that holds the result of - a unary operation on VECTOR_CST T. ALLOW_STEPPED_P is true if the - operation can handle stepped encodings directly, without having to - expand the full sequence. - - Return true if the operation is possible, which it always is when - ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ - -bool -tree_vector_builder::new_unary_operation (tree type, tree t, - bool allow_stepped_p) -{ - poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type); - gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t)))); - unsigned int npatterns = VECTOR_CST_NPATTERNS (t); - unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t); - if (!allow_stepped_p && nelts_per_pattern > 2) - { - if (!full_nelts.is_constant ()) - return false; - npatterns = full_nelts.to_constant (); - nelts_per_pattern = 1; - } - new_vector (type, npatterns, nelts_per_pattern); - return true; -} - -/* Try to start building a new vector of type TYPE that holds the result of - a binary operation on VECTOR_CSTs T1 and T2. ALLOW_STEPPED_P is true if - the operation can handle stepped encodings directly, without having to - expand the full sequence. - - Return true if the operation is possible. Leave the builder unchanged - otherwise. */ - -bool -tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2, - bool allow_stepped_p) -{ - poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type); - gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))) - && known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)))); - /* Conceptually we split the patterns in T1 and T2 until we have - an equal number for both. Each split pattern requires the same - number of elements per pattern as the original. E.g. splitting: - - { 1, 2, 3, ... } - - into two gives: - - { 1, 3, 5, ... } - { 2, 4, 6, ... } - - while splitting: - - { 1, 0, ... } - - into two gives: - - { 1, 0, ... } - { 0, 0, ... }. */ - unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), - VECTOR_CST_NPATTERNS (t2)); - unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), - VECTOR_CST_NELTS_PER_PATTERN (t2)); - if (!allow_stepped_p && nelts_per_pattern > 2) - { - if (!full_nelts.is_constant ()) - return false; - npatterns = full_nelts.to_constant (); - nelts_per_pattern = 1; - } - new_vector (type, npatterns, nelts_per_pattern); - return true; -} - -/* Return the number of elements that the caller needs to operate on in - order to handle a binary operation on VECTOR_CSTs T1 and T2. This static - function is used instead of new_binary_operation if the result of the - operation is not a VECTOR_CST. */ - -unsigned int -tree_vector_builder::binary_encoded_nelts (tree t1, tree t2) -{ - poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)); - gcc_assert (known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)))); - /* See new_binary_operation for details. */ - unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), - VECTOR_CST_NPATTERNS (t2)); - unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), - VECTOR_CST_NELTS_PER_PATTERN (t2)); - unsigned HOST_WIDE_INT const_nelts; - if (nelts.is_constant (&const_nelts)) - return MIN (npatterns * nelts_per_pattern, const_nelts); - return npatterns * nelts_per_pattern; -} - /* Return a vector element with the value BASE + FACTOR * STEP. */ tree diff --git a/gcc/tree-vector-builder.h b/gcc/tree-vector-builder.h index 0e36cd17139..6a4cf6fbe01 100644 --- a/gcc/tree-vector-builder.h +++ b/gcc/tree-vector-builder.h @@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see /* This class is used to build VECTOR_CSTs from a sequence of elements. See vector_builder for more details. */ -class tree_vector_builder : public vector_builder +class tree_vector_builder : public vector_builder { - typedef vector_builder parent; - friend class vector_builder; + typedef vector_builder parent; + friend class vector_builder; public: tree_vector_builder () : m_type (0) {} @@ -37,10 +38,6 @@ public: tree type () const { return m_type; } void new_vector (tree, unsigned int, unsigned int); - bool new_unary_operation (tree, tree, bool); - bool new_binary_operation (tree, tree, tree, bool); - - static unsigned int binary_encoded_nelts (tree, tree); private: bool equal_p (const_tree, const_tree) const; @@ -51,6 +48,15 @@ private: bool can_elide_p (const_tree) const; void note_representative (tree *, tree); + static poly_uint64 shape_nelts (const_tree t) + { return TYPE_VECTOR_SUBPARTS (t); } + static poly_uint64 nelts_of (const_tree t) + { return VECTOR_CST_NELTS (t); } + static unsigned int npatterns_of (const_tree t) + { return VECTOR_CST_NPATTERNS (t); } + static unsigned int nelts_per_pattern_of (const_tree t) + { return VECTOR_CST_NELTS_PER_PATTERN (t); } + tree m_type; }; diff --git a/gcc/vector-builder.h b/gcc/vector-builder.h index aac8a875a58..37911ac693d 100644 --- a/gcc/vector-builder.h +++ b/gcc/vector-builder.h @@ -45,8 +45,11 @@ along with GCC; see the file COPYING3. If not see variable-length vectors. finalize () then canonicalizes the encoding to a simpler form if possible. - The derived class Derived provides this functionality for specific Ts. - Derived needs to provide the following interface: + Shape is the type that specifies the number of elements in the vector + and (where relevant) the type of each element. + + The derived class Derived provides the functionality of this class + for specific Ts. Derived needs to provide the following interface: bool equal_p (T elt1, T elt2) const; @@ -82,9 +85,30 @@ along with GCC; see the file COPYING3. If not see Record that ELT2 is being elided, given that ELT1_PTR points to the last encoded element for the containing pattern. This is - again provided for TREE_OVERFLOW handling. */ + again provided for TREE_OVERFLOW handling. + + static poly_uint64 shape_nelts (Shape shape); + + Return the number of elements in SHAPE. + + The class provides additional functionality for the case in which + T can describe a vector constant as well as an individual element. + This functionality requires: + + static poly_uint64 nelts_of (T x); + + Return the number of elements in vector constant X. -template + static unsigned int npatterns_of (T x); + + Return the number of patterns used to encode vector constant X. + + static unsigned int nelts_per_pattern_of (T x); + + Return the number of elements used to encode each pattern + in vector constant X. */ + +template class vector_builder : public auto_vec { public: @@ -101,8 +125,13 @@ public: bool operator == (const Derived &) const; bool operator != (const Derived &x) const { return !operator == (x); } + bool new_unary_operation (Shape, T, bool); + bool new_binary_operation (Shape, T, T, bool); + void finalize (); + static unsigned int binary_encoded_nelts (T, T); + protected: void new_vector (poly_uint64, unsigned int, unsigned int); void reshape (unsigned int, unsigned int); @@ -121,16 +150,16 @@ private: unsigned int m_nelts_per_pattern; }; -template +template inline const Derived * -vector_builder::derived () const +vector_builder::derived () const { return static_cast (this); } -template +template inline -vector_builder::vector_builder () +vector_builder::vector_builder () : m_full_nelts (0), m_npatterns (0), m_nelts_per_pattern (0) @@ -140,18 +169,18 @@ vector_builder::vector_builder () starts with these explicitly-encoded elements and may contain additional elided elements. */ -template +template inline unsigned int -vector_builder::encoded_nelts () const +vector_builder::encoded_nelts () const { return m_npatterns * m_nelts_per_pattern; } /* Return true if every element of the vector is explicitly encoded. */ -template +template inline bool -vector_builder::encoded_full_vector_p () const +vector_builder::encoded_full_vector_p () const { return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); } @@ -159,11 +188,11 @@ vector_builder::encoded_full_vector_p () const /* Start building a vector that has FULL_NELTS elements. Initially encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ -template +template void -vector_builder::new_vector (poly_uint64 full_nelts, - unsigned int npatterns, - unsigned int nelts_per_pattern) +vector_builder::new_vector (poly_uint64 full_nelts, + unsigned int npatterns, + unsigned int nelts_per_pattern) { m_full_nelts = full_nelts; m_npatterns = npatterns; @@ -175,9 +204,9 @@ vector_builder::new_vector (poly_uint64 full_nelts, /* Return true if this vector and OTHER have the same elements and are encoded in the same way. */ -template +template bool -vector_builder::operator == (const Derived &other) const +vector_builder::operator == (const Derived &other) const { if (maybe_ne (m_full_nelts, other.m_full_nelts) || m_npatterns != other.m_npatterns @@ -195,9 +224,9 @@ vector_builder::operator == (const Derived &other) const /* Return the value of vector element I, which might or might not be encoded explicitly. */ -template +template T -vector_builder::elt (unsigned int i) const +vector_builder::elt (unsigned int i) const { /* First handle elements that are already present in the underlying vector, regardless of whether they're part of the encoding or not. */ @@ -225,12 +254,118 @@ vector_builder::elt (unsigned int i) const derived ()->step (prev, final)); } +/* Try to start building a new vector of shape SHAPE that holds the result of + a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the + operation can handle stepped encodings directly, without having to expand + the full sequence. + + Return true if the operation is possible, which it always is when + ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ + +template +bool +vector_builder::new_unary_operation (Shape shape, T vec, + bool allow_stepped_p) +{ + poly_uint64 full_nelts = Derived::shape_nelts (shape); + gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec))); + unsigned int npatterns = Derived::npatterns_of (vec); + unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec); + if (!allow_stepped_p && nelts_per_pattern > 2) + { + if (!full_nelts.is_constant ()) + return false; + npatterns = full_nelts.to_constant (); + nelts_per_pattern = 1; + } + derived ()->new_vector (shape, npatterns, nelts_per_pattern); + return true; +} + +/* Try to start building a new vector of shape SHAPE that holds the result of + a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is + true if the operation can handle stepped encodings directly, without + having to expand the full sequence. + + Return true if the operation is possible. Leave the builder unchanged + otherwise. */ + +template +bool +vector_builder::new_binary_operation (Shape shape, + T vec1, T vec2, + bool allow_stepped_p) +{ + poly_uint64 full_nelts = Derived::shape_nelts (shape); + gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1)) + && known_eq (full_nelts, Derived::nelts_of (vec2))); + /* Conceptually we split the patterns in VEC1 and VEC2 until we have + an equal number for both. Each split pattern requires the same + number of elements per pattern as the original. E.g. splitting: + + { 1, 2, 3, ... } + + into two gives: + + { 1, 3, 5, ... } + { 2, 4, 6, ... } + + while splitting: + + { 1, 0, ... } + + into two gives: + + { 1, 0, ... } + { 0, 0, ... }. */ + unsigned int npatterns + = least_common_multiple (Derived::npatterns_of (vec1), + Derived::npatterns_of (vec2)); + unsigned int nelts_per_pattern + = MAX (Derived::nelts_per_pattern_of (vec1), + Derived::nelts_per_pattern_of (vec2)); + if (!allow_stepped_p && nelts_per_pattern > 2) + { + if (!full_nelts.is_constant ()) + return false; + npatterns = full_nelts.to_constant (); + nelts_per_pattern = 1; + } + derived ()->new_vector (shape, npatterns, nelts_per_pattern); + return true; +} + +/* Return the number of elements that the caller needs to operate on in + order to handle a binary operation on vector constants VEC1 and VEC2. + This static function is used instead of new_binary_operation if the + result of the operation is not a constant vector. */ + +template +unsigned int +vector_builder::binary_encoded_nelts (T vec1, T vec2) +{ + poly_uint64 nelts = Derived::nelts_of (vec1); + gcc_assert (known_eq (nelts, Derived::nelts_of (vec2))); + /* See new_binary_operation for details. */ + unsigned int npatterns + = least_common_multiple (Derived::npatterns_of (vec1), + Derived::npatterns_of (vec2)); + unsigned int nelts_per_pattern + = MAX (Derived::nelts_per_pattern_of (vec1), + Derived::nelts_per_pattern_of (vec2)); + unsigned HOST_WIDE_INT const_nelts; + if (nelts.is_constant (&const_nelts)) + return MIN (npatterns * nelts_per_pattern, const_nelts); + return npatterns * nelts_per_pattern; +} + /* Return the number of leading duplicate elements in the range [START:END:STEP]. The value is always at least 1. */ -template +template unsigned int -vector_builder::count_dups (int start, int end, int step) const +vector_builder::count_dups (int start, int end, + int step) const { gcc_assert ((end - start) % step == 0); @@ -245,10 +380,10 @@ vector_builder::count_dups (int start, int end, int step) const /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, but without changing the underlying vector. */ -template +template void -vector_builder::reshape (unsigned int npatterns, - unsigned int nelts_per_pattern) +vector_builder::reshape (unsigned int npatterns, + unsigned int nelts_per_pattern) { unsigned int old_encoded_nelts = encoded_nelts (); unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; @@ -268,11 +403,11 @@ vector_builder::reshape (unsigned int npatterns, /* Return true if elements [START, END) contain a repeating sequence of STEP elements. */ -template +template bool -vector_builder::repeating_sequence_p (unsigned int start, - unsigned int end, - unsigned int step) +vector_builder::repeating_sequence_p (unsigned int start, + unsigned int end, + unsigned int step) { for (unsigned int i = start; i < end - step; ++i) if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) @@ -283,11 +418,11 @@ vector_builder::repeating_sequence_p (unsigned int start, /* Return true if elements [START, END) contain STEP interleaved linear series. */ -template +template bool -vector_builder::stepped_sequence_p (unsigned int start, - unsigned int end, - unsigned int step) +vector_builder::stepped_sequence_p (unsigned int start, + unsigned int end, + unsigned int step) { if (!derived ()->allow_steps_p ()) return false; @@ -316,9 +451,9 @@ vector_builder::stepped_sequence_p (unsigned int start, /* Try to change the number of encoded patterns to NPATTERNS, returning true on success. */ -template +template bool -vector_builder::try_npatterns (unsigned int npatterns) +vector_builder::try_npatterns (unsigned int npatterns) { if (m_nelts_per_pattern == 1) { @@ -369,9 +504,9 @@ vector_builder::try_npatterns (unsigned int npatterns) /* Replace the current encoding with the canonical form. */ -template +template void -vector_builder::finalize () +vector_builder::finalize () { /* The encoding requires the same number of elements to come from each pattern. */