1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "recog.h" /* FIXME: for insn_data */
47 static void expand_vector_operations_1 (gimple_stmt_iterator
*, bitmap
);
49 /* Return the number of elements in a vector type TYPE that we have
50 already decided needs to be expanded piecewise. We don't support
51 this kind of expansion for variable-length vectors, since we should
52 always check for target support before introducing uses of those. */
54 nunits_for_known_piecewise_op (const_tree type
)
56 return TYPE_VECTOR_SUBPARTS (type
).to_constant ();
59 /* Return true if TYPE1 has more elements than TYPE2, where either
60 type may be a vector or a scalar. */
63 subparts_gt (tree type1
, tree type2
)
65 poly_uint64 n1
= VECTOR_TYPE_P (type1
) ? TYPE_VECTOR_SUBPARTS (type1
) : 1;
66 poly_uint64 n2
= VECTOR_TYPE_P (type2
) ? TYPE_VECTOR_SUBPARTS (type2
) : 1;
67 return known_gt (n1
, n2
);
70 /* Build a constant of type TYPE, made of VALUE's bits replicated
71 every WIDTH bits to fit TYPE's precision. */
73 build_replicated_const (tree type
, unsigned int width
, HOST_WIDE_INT value
)
75 int n
= (TYPE_PRECISION (type
) + HOST_BITS_PER_WIDE_INT
- 1)
76 / HOST_BITS_PER_WIDE_INT
;
77 unsigned HOST_WIDE_INT low
, mask
;
78 HOST_WIDE_INT a
[WIDE_INT_MAX_ELTS
];
81 gcc_assert (n
&& n
<= WIDE_INT_MAX_ELTS
);
83 if (width
== HOST_BITS_PER_WIDE_INT
)
87 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
88 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
91 for (i
= 0; i
< n
; i
++)
94 gcc_assert (TYPE_PRECISION (type
) <= MAX_BITSIZE_MODE_ANY_INT
);
95 return wide_int_to_tree
96 (type
, wide_int::from_array (a
, n
, TYPE_PRECISION (type
)));
99 static GTY(()) tree vector_inner_type
;
100 static GTY(()) tree vector_last_type
;
101 static GTY(()) int vector_last_nunits
;
103 /* Return a suitable vector types made of SUBPARTS units each of mode
104 "word_mode" (the global variable). */
106 build_word_mode_vector_type (int nunits
)
108 if (!vector_inner_type
)
109 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
110 else if (vector_last_nunits
== nunits
)
112 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
113 return vector_last_type
;
116 vector_last_nunits
= nunits
;
117 vector_last_type
= build_vector_type (vector_inner_type
, nunits
);
118 return vector_last_type
;
121 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
122 tree
, tree
, tree
, tree
, tree
, enum tree_code
,
126 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
127 tree t
, tree bitsize
, tree bitpos
)
129 if (TREE_CODE (t
) == SSA_NAME
)
131 gimple
*def_stmt
= SSA_NAME_DEF_STMT (t
);
132 if (is_gimple_assign (def_stmt
)
133 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
135 && gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
)))
136 t
= gimple_assign_rhs1 (def_stmt
);
140 if (TREE_CODE (type
) == BOOLEAN_TYPE
)
143 = build_nonstandard_integer_type (tree_to_uhwi (bitsize
), 0);
144 tree field
= gimplify_build3 (gsi
, BIT_FIELD_REF
, itype
, t
,
146 return gimplify_build2 (gsi
, NE_EXPR
, type
, field
,
147 build_zero_cst (itype
));
150 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
153 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
157 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
158 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
159 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
161 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
162 return gimplify_build1 (gsi
, code
, inner_type
, a
);
166 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
167 tree bitpos
, tree bitsize
, enum tree_code code
,
168 tree type ATTRIBUTE_UNUSED
)
170 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
171 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
172 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
173 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
174 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
177 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
179 INNER_TYPE is the type of A and B elements
181 returned expression is of signed integer type with the
182 size equal to the size of INNER_TYPE. */
184 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
185 tree bitpos
, tree bitsize
, enum tree_code code
, tree type
)
187 tree stype
= TREE_TYPE (type
);
188 tree cst_false
= build_zero_cst (stype
);
189 tree cst_true
= build_all_ones_cst (stype
);
192 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
193 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
195 cmp
= build2 (code
, boolean_type_node
, a
, b
);
196 return gimplify_build3 (gsi
, COND_EXPR
, stype
, cmp
, cst_true
, cst_false
);
199 /* Expand vector addition to scalars. This does bit twiddling
200 in order to increase parallelism:
202 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
205 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
206 (a ^ ~b) & 0x80808080
208 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
210 This optimization should be done only if 4 vector items or more
213 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
214 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
215 enum tree_code code
, tree type ATTRIBUTE_UNUSED
)
217 unsigned int width
= vector_element_bits (TREE_TYPE (a
));
218 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
219 unsigned HOST_WIDE_INT max
;
220 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
222 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
223 low_bits
= build_replicated_const (word_type
, width
, max
>> 1);
224 high_bits
= build_replicated_const (word_type
, width
, max
& ~(max
>> 1));
226 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
227 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
229 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
230 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
231 if (code
== PLUS_EXPR
)
232 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
235 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
236 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
239 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
240 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
241 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
245 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
246 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
247 tree bitsize ATTRIBUTE_UNUSED
,
248 enum tree_code code ATTRIBUTE_UNUSED
,
249 tree type ATTRIBUTE_UNUSED
)
251 unsigned int width
= vector_element_bits (TREE_TYPE (b
));
252 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
254 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
256 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
257 low_bits
= build_replicated_const (word_type
, width
, max
>> 1);
258 high_bits
= build_replicated_const (word_type
, width
, max
& ~(max
>> 1));
260 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
262 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
263 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
264 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
265 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
266 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
269 /* Expand a vector operation to scalars, by using many operations
270 whose type is the vector type's inner type. */
272 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
273 tree type
, tree inner_type
,
274 tree a
, tree b
, enum tree_code code
,
275 tree ret_type
= NULL_TREE
)
277 vec
<constructor_elt
, va_gc
> *v
;
278 tree part_width
= TYPE_SIZE (inner_type
);
279 tree index
= bitsize_int (0);
280 int nunits
= nunits_for_known_piecewise_op (type
);
281 int delta
= tree_to_uhwi (part_width
) / vector_element_bits (type
);
283 location_t loc
= gimple_location (gsi_stmt (*gsi
));
286 || types_compatible_p (gimple_expr_type (gsi_stmt (*gsi
)), type
))
287 warning_at (loc
, OPT_Wvector_operation_performance
,
288 "vector operation will be expanded piecewise");
290 warning_at (loc
, OPT_Wvector_operation_performance
,
291 "vector operation will be expanded in parallel");
295 vec_alloc (v
, (nunits
+ delta
- 1) / delta
);
296 for (i
= 0; i
< nunits
;
297 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
299 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
,
301 constructor_elt ce
= {NULL_TREE
, result
};
305 return build_constructor (ret_type
, v
);
308 /* Expand a vector operation to scalars with the freedom to use
309 a scalar integer type, or to use a different size for the items
310 in the vector type. */
312 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
313 tree a
, tree b
, enum tree_code code
)
315 tree result
, compute_type
;
316 int n_words
= tree_to_uhwi (TYPE_SIZE_UNIT (type
)) / UNITS_PER_WORD
;
317 location_t loc
= gimple_location (gsi_stmt (*gsi
));
319 /* We have three strategies. If the type is already correct, just do
320 the operation an element at a time. Else, if the vector is wider than
321 one word, do it a word at a time; finally, if the vector is smaller
322 than one word, do it as a scalar. */
323 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
324 return expand_vector_piecewise (gsi
, f
,
325 type
, TREE_TYPE (type
),
327 else if (n_words
> 1)
329 tree word_type
= build_word_mode_vector_type (n_words
);
330 result
= expand_vector_piecewise (gsi
, f
,
331 word_type
, TREE_TYPE (word_type
),
333 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
338 /* Use a single scalar operation with a mode no wider than word_mode. */
340 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type
)), 0).require ();
341 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
342 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
, type
);
343 warning_at (loc
, OPT_Wvector_operation_performance
,
344 "vector operation will be expanded with a "
345 "single scalar operation");
351 /* Expand a vector operation to scalars; for integer types we can use
352 special bit twiddling tricks to do the sums a word at a time, using
353 function F_PARALLEL instead of F. These tricks are done only if
354 they can process at least four items, that is, only if the vector
355 holds at least four items and if a word can hold four items. */
357 expand_vector_addition (gimple_stmt_iterator
*gsi
,
358 elem_op_func f
, elem_op_func f_parallel
,
359 tree type
, tree a
, tree b
, enum tree_code code
)
361 int parts_per_word
= BITS_PER_WORD
/ vector_element_bits (type
);
363 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
364 && parts_per_word
>= 4
365 && nunits_for_known_piecewise_op (type
) >= 4)
366 return expand_vector_parallel (gsi
, f_parallel
,
369 return expand_vector_piecewise (gsi
, f
,
370 type
, TREE_TYPE (type
),
375 expand_vector_condition (gimple_stmt_iterator
*gsi
, bitmap dce_ssa_names
);
377 /* Try to expand vector comparison expression OP0 CODE OP1 by
378 querying optab if the following expression:
379 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
382 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
383 tree op1
, enum tree_code code
,
384 bitmap dce_ssa_names
)
386 tree lhs
= gimple_assign_lhs (gsi_stmt (*gsi
));
388 imm_use_iterator iterator
;
389 bool vec_cond_expr_only
= true;
391 /* As seen in PR95830, we should not expand comparisons that are only
392 feeding a VEC_COND_EXPR statement. */
393 auto_vec
<gimple
*> uses
;
394 FOR_EACH_IMM_USE_FAST (use_p
, iterator
, lhs
)
395 uses
.safe_push (USE_STMT (use_p
));
397 for (unsigned i
= 0; i
< uses
.length (); i
++)
399 gassign
*use
= dyn_cast
<gassign
*> (uses
[i
]);
401 && gimple_assign_rhs_code (use
) == VEC_COND_EXPR
402 && gimple_assign_rhs1 (use
) == lhs
)
404 gimple_stmt_iterator it
= gsi_for_stmt (use
);
405 if (!expand_vector_condition (&it
, dce_ssa_names
))
407 vec_cond_expr_only
= false;
413 vec_cond_expr_only
= false;
418 if (!uses
.is_empty () && vec_cond_expr_only
)
422 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0
), type
, code
)
423 && !expand_vec_cond_expr_p (type
, TREE_TYPE (op0
), code
))
425 if (VECTOR_BOOLEAN_TYPE_P (type
)
426 && SCALAR_INT_MODE_P (TYPE_MODE (type
))
427 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type
)),
428 TYPE_VECTOR_SUBPARTS (type
)
429 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
430 (TREE_TYPE (type
)))))
432 tree inner_type
= TREE_TYPE (TREE_TYPE (op0
));
433 tree part_width
= vector_element_bits_tree (TREE_TYPE (op0
));
434 tree index
= bitsize_int (0);
435 int nunits
= nunits_for_known_piecewise_op (TREE_TYPE (op0
));
436 int prec
= GET_MODE_PRECISION (SCALAR_TYPE_MODE (type
));
437 tree ret_type
= build_nonstandard_integer_type (prec
, 1);
438 tree ret_inner_type
= boolean_type_node
;
440 location_t loc
= gimple_location (gsi_stmt (*gsi
));
441 t
= build_zero_cst (ret_type
);
443 if (TYPE_PRECISION (ret_inner_type
) != 1)
444 ret_inner_type
= build_nonstandard_integer_type (1, 1);
445 warning_at (loc
, OPT_Wvector_operation_performance
,
446 "vector operation will be expanded piecewise");
447 for (i
= 0; i
< nunits
;
448 i
++, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
450 tree a
= tree_vec_extract (gsi
, inner_type
, op0
, part_width
,
452 tree b
= tree_vec_extract (gsi
, inner_type
, op1
, part_width
,
454 tree result
= gimplify_build2 (gsi
, code
, ret_inner_type
, a
, b
);
455 t
= gimplify_build3 (gsi
, BIT_INSERT_EXPR
, ret_type
, t
, result
,
458 t
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
461 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
462 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
,
471 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
472 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
473 the result if successful, otherwise return NULL_TREE. */
475 add_rshift (gimple_stmt_iterator
*gsi
, tree type
, tree op0
, int *shiftcnts
)
478 unsigned int i
, nunits
= nunits_for_known_piecewise_op (type
);
479 bool scalar_shift
= true;
481 for (i
= 1; i
< nunits
; i
++)
483 if (shiftcnts
[i
] != shiftcnts
[0])
484 scalar_shift
= false;
487 if (scalar_shift
&& shiftcnts
[0] == 0)
492 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_scalar
);
493 if (op
!= unknown_optab
494 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
495 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
,
496 build_int_cst (NULL_TREE
, shiftcnts
[0]));
499 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
500 if (op
!= unknown_optab
501 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
503 tree_vector_builder
vec (type
, nunits
, 1);
504 for (i
= 0; i
< nunits
; i
++)
505 vec
.quick_push (build_int_cst (TREE_TYPE (type
), shiftcnts
[i
]));
506 return gimplify_build2 (gsi
, RSHIFT_EXPR
, type
, op0
, vec
.build ());
512 /* Try to expand integer vector division by constant using
513 widening multiply, shifts and additions. */
515 expand_vector_divmod (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
516 tree op1
, enum tree_code code
)
518 bool use_pow2
= true;
519 bool has_vector_shift
= true;
520 bool use_abs_op1
= false;
521 int mode
= -1, this_mode
;
522 int pre_shift
= -1, post_shift
;
523 unsigned int nunits
= nunits_for_known_piecewise_op (type
);
524 int *shifts
= XALLOCAVEC (int, nunits
* 4);
525 int *pre_shifts
= shifts
+ nunits
;
526 int *post_shifts
= pre_shifts
+ nunits
;
527 int *shift_temps
= post_shifts
+ nunits
;
528 unsigned HOST_WIDE_INT
*mulc
= XALLOCAVEC (unsigned HOST_WIDE_INT
, nunits
);
529 int prec
= TYPE_PRECISION (TREE_TYPE (type
));
532 signop sign_p
= TYPE_SIGN (TREE_TYPE (type
));
533 unsigned HOST_WIDE_INT mask
= GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type
)));
534 tree cur_op
, mulcst
, tem
;
537 if (prec
> HOST_BITS_PER_WIDE_INT
)
540 op
= optab_for_tree_code (RSHIFT_EXPR
, type
, optab_vector
);
541 if (op
== unknown_optab
542 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
543 has_vector_shift
= false;
545 /* Analysis phase. Determine if all op1 elements are either power
546 of two and it is possible to expand it using shifts (or for remainder
547 using masking). Additionally compute the multiplicative constants
548 and pre and post shifts if the division is to be expanded using
549 widening or high part multiplication plus shifts. */
550 for (i
= 0; i
< nunits
; i
++)
552 tree cst
= VECTOR_CST_ELT (op1
, i
);
553 unsigned HOST_WIDE_INT ml
;
555 if (TREE_CODE (cst
) != INTEGER_CST
|| integer_zerop (cst
))
561 && (!integer_pow2p (cst
) || tree_int_cst_sgn (cst
) != 1))
565 shifts
[i
] = tree_log2 (cst
);
566 if (shifts
[i
] != shifts
[0]
567 && code
== TRUNC_DIV_EXPR
568 && !has_vector_shift
)
573 if (sign_p
== UNSIGNED
)
575 unsigned HOST_WIDE_INT mh
;
576 unsigned HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
) & mask
;
578 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
579 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
588 /* Find a suitable multiplier and right shift count
589 instead of multiplying with D. */
590 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
, &dummy_int
);
592 /* If the suggested multiplier is more than SIZE bits, we can
593 do better for even divisors, using an initial right shift. */
594 if ((mh
!= 0 && (d
& 1) == 0)
595 || (!has_vector_shift
&& pre_shift
!= -1))
597 if (has_vector_shift
)
598 pre_shift
= ctz_or_zero (d
);
599 else if (pre_shift
== -1)
602 for (j
= 0; j
< nunits
; j
++)
604 tree cst2
= VECTOR_CST_ELT (op1
, j
);
605 unsigned HOST_WIDE_INT d2
;
608 if (!tree_fits_uhwi_p (cst2
))
610 d2
= tree_to_uhwi (cst2
) & mask
;
613 this_pre_shift
= floor_log2 (d2
& -d2
);
614 if (pre_shift
== -1 || this_pre_shift
< pre_shift
)
615 pre_shift
= this_pre_shift
;
617 if (i
!= 0 && pre_shift
!= 0)
627 if ((d
>> pre_shift
) <= 1)
632 mh
= choose_multiplier (d
>> pre_shift
, prec
,
634 &ml
, &post_shift
, &dummy_int
);
636 pre_shifts
[i
] = pre_shift
;
646 HOST_WIDE_INT d
= TREE_INT_CST_LOW (cst
);
647 unsigned HOST_WIDE_INT abs_d
;
652 /* Since d might be INT_MIN, we have to cast to
653 unsigned HOST_WIDE_INT before negating to avoid
654 undefined signed overflow. */
656 ? (unsigned HOST_WIDE_INT
) d
657 : - (unsigned HOST_WIDE_INT
) d
);
659 /* n rem d = n rem -d */
660 if (code
== TRUNC_MOD_EXPR
&& d
< 0)
665 if (abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
667 /* This case is not handled correctly below. */
677 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
,
678 &post_shift
, &dummy_int
);
679 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
681 this_mode
= 4 + (d
< 0);
682 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
685 this_mode
= 2 + (d
< 0);
688 post_shifts
[i
] = post_shift
;
689 if ((i
&& !has_vector_shift
&& post_shifts
[0] != post_shift
)
690 || post_shift
>= prec
691 || pre_shifts
[i
] >= prec
)
696 else if (mode
!= this_mode
)
702 tree addend
= NULL_TREE
;
703 if (sign_p
== SIGNED
)
707 /* Both division and remainder sequences need
708 op0 < 0 ? mask : 0 computed. It can be either computed as
709 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
710 if none of the shifts is 0, or as the conditional. */
711 for (i
= 0; i
< nunits
; i
++)
715 = build_vector_type (build_nonstandard_integer_type (prec
, 1),
717 if (i
== nunits
&& TYPE_MODE (uns_type
) == TYPE_MODE (type
))
719 for (i
= 0; i
< nunits
; i
++)
720 shift_temps
[i
] = prec
- 1;
721 cur_op
= add_rshift (gsi
, type
, op0
, shift_temps
);
722 if (cur_op
!= NULL_TREE
)
724 cur_op
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
726 for (i
= 0; i
< nunits
; i
++)
727 shift_temps
[i
] = prec
- shifts
[i
];
728 cur_op
= add_rshift (gsi
, uns_type
, cur_op
, shift_temps
);
729 if (cur_op
!= NULL_TREE
)
730 addend
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
,
734 if (addend
== NULL_TREE
735 && expand_vec_cond_expr_p (type
, type
, LT_EXPR
))
737 tree zero
, cst
, mask_type
, mask
;
740 mask_type
= truth_type_for (type
);
741 zero
= build_zero_cst (type
);
742 mask
= make_ssa_name (mask_type
);
743 cond
= gimple_build_assign (mask
, LT_EXPR
, op0
, zero
);
744 gsi_insert_before (gsi
, cond
, GSI_SAME_STMT
);
745 tree_vector_builder
vec (type
, nunits
, 1);
746 for (i
= 0; i
< nunits
; i
++)
747 vec
.quick_push (build_int_cst (TREE_TYPE (type
),
751 addend
= make_ssa_name (type
);
753 = gimple_build_assign (addend
, VEC_COND_EXPR
, mask
, cst
, zero
);
754 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
757 if (code
== TRUNC_DIV_EXPR
)
759 if (sign_p
== UNSIGNED
)
761 /* q = op0 >> shift; */
762 cur_op
= add_rshift (gsi
, type
, op0
, shifts
);
763 if (cur_op
!= NULL_TREE
)
766 else if (addend
!= NULL_TREE
)
768 /* t1 = op0 + addend;
770 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
771 if (op
!= unknown_optab
772 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
774 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
, addend
);
775 cur_op
= add_rshift (gsi
, type
, cur_op
, shifts
);
776 if (cur_op
!= NULL_TREE
)
784 tree_vector_builder
vec (type
, nunits
, 1);
785 for (i
= 0; i
< nunits
; i
++)
786 vec
.quick_push (build_int_cst (TREE_TYPE (type
),
790 op
= optab_for_tree_code (BIT_AND_EXPR
, type
, optab_default
);
791 if (op
!= unknown_optab
792 && optab_handler (op
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
794 if (sign_p
== UNSIGNED
)
795 /* r = op0 & mask; */
796 return gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, op0
, mask
);
797 else if (addend
!= NULL_TREE
)
799 /* t1 = op0 + addend;
802 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
803 if (op
!= unknown_optab
804 && optab_handler (op
, TYPE_MODE (type
))
807 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, op0
,
809 cur_op
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
,
811 op
= optab_for_tree_code (MINUS_EXPR
, type
,
813 if (op
!= unknown_optab
814 && optab_handler (op
, TYPE_MODE (type
))
816 return gimplify_build2 (gsi
, MINUS_EXPR
, type
,
824 if (mode
== -2 || BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
827 if (!can_mult_highpart_p (TYPE_MODE (type
), TYPE_UNSIGNED (type
)))
835 gcc_assert (sign_p
== UNSIGNED
);
836 /* t1 = oprnd0 >> pre_shift;
838 q = t2 >> post_shift; */
839 cur_op
= add_rshift (gsi
, type
, cur_op
, pre_shifts
);
840 if (cur_op
== NULL_TREE
)
844 gcc_assert (sign_p
== UNSIGNED
);
845 for (i
= 0; i
< nunits
; i
++)
855 gcc_assert (sign_p
== SIGNED
);
856 for (i
= 0; i
< nunits
; i
++)
857 shift_temps
[i
] = prec
- 1;
863 tree_vector_builder
vec (type
, nunits
, 1);
864 for (i
= 0; i
< nunits
; i
++)
865 vec
.quick_push (build_int_cst (TREE_TYPE (type
), mulc
[i
]));
866 mulcst
= vec
.build ();
868 cur_op
= gimplify_build2 (gsi
, MULT_HIGHPART_EXPR
, type
, cur_op
, mulcst
);
873 /* t1 = oprnd0 >> pre_shift;
875 q = t2 >> post_shift; */
876 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
879 /* t1 = oprnd0 h* ml;
883 q = t4 >> (post_shift - 1); */
884 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
885 if (op
== unknown_optab
886 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
888 tem
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, cur_op
);
889 tem
= add_rshift (gsi
, type
, tem
, shift_temps
);
890 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
891 if (op
== unknown_optab
892 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
894 tem
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, tem
);
895 cur_op
= add_rshift (gsi
, type
, tem
, post_shifts
);
896 if (cur_op
== NULL_TREE
)
903 /* t1 = oprnd0 h* ml;
904 t2 = t1; [ iff (mode & 2) != 0 ]
905 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
906 t3 = t2 >> post_shift;
907 t4 = oprnd0 >> (prec - 1);
908 q = t3 - t4; [ iff (mode & 1) == 0 ]
909 q = t4 - t3; [ iff (mode & 1) != 0 ] */
912 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
913 if (op
== unknown_optab
914 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
916 cur_op
= gimplify_build2 (gsi
, PLUS_EXPR
, type
, cur_op
, op0
);
918 cur_op
= add_rshift (gsi
, type
, cur_op
, post_shifts
);
919 if (cur_op
== NULL_TREE
)
921 tem
= add_rshift (gsi
, type
, op0
, shift_temps
);
922 if (tem
== NULL_TREE
)
924 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
925 if (op
== unknown_optab
926 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
929 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, cur_op
, tem
);
931 cur_op
= gimplify_build2 (gsi
, MINUS_EXPR
, type
, tem
, cur_op
);
937 if (code
== TRUNC_DIV_EXPR
)
940 /* We divided. Now finish by:
943 op
= optab_for_tree_code (MULT_EXPR
, type
, optab_default
);
944 if (op
== unknown_optab
945 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
949 tree_vector_builder elts
;
950 if (!elts
.new_unary_operation (type
, op1
, false))
952 unsigned int count
= elts
.encoded_nelts ();
953 for (unsigned int i
= 0; i
< count
; ++i
)
955 tree elem1
= VECTOR_CST_ELT (op1
, i
);
957 tree elt
= const_unop (ABS_EXPR
, TREE_TYPE (elem1
), elem1
);
958 if (elt
== NULL_TREE
)
960 elts
.quick_push (elt
);
964 tem
= gimplify_build2 (gsi
, MULT_EXPR
, type
, cur_op
, op1
);
965 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
966 if (op
== unknown_optab
967 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
969 return gimplify_build2 (gsi
, MINUS_EXPR
, type
, op0
, tem
);
972 /* Expand a vector condition to scalars, by using many conditions
973 on the vector's elements. */
976 expand_vector_condition (gimple_stmt_iterator
*gsi
, bitmap dce_ssa_names
)
978 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
979 tree type
= gimple_expr_type (stmt
);
980 tree a
= gimple_assign_rhs1 (stmt
);
983 bool a_is_comparison
= false;
984 bool a_is_scalar_bitmask
= false;
985 tree b
= gimple_assign_rhs2 (stmt
);
986 tree c
= gimple_assign_rhs3 (stmt
);
987 vec
<constructor_elt
, va_gc
> *v
;
989 tree inner_type
= TREE_TYPE (type
);
990 tree width
= vector_element_bits_tree (type
);
991 tree cond_type
= TREE_TYPE (TREE_TYPE (a
));
992 tree comp_inner_type
= cond_type
;
993 tree index
= bitsize_int (0);
994 tree comp_width
= width
;
995 tree comp_index
= index
;
996 location_t loc
= gimple_location (gsi_stmt (*gsi
));
997 tree_code code
= TREE_CODE (a
);
998 gassign
*assign
= NULL
;
1000 if (code
== SSA_NAME
)
1002 assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (a
));
1004 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) == tcc_comparison
)
1006 a_is_comparison
= true;
1007 a1
= gimple_assign_rhs1 (assign
);
1008 a2
= gimple_assign_rhs2 (assign
);
1009 code
= gimple_assign_rhs_code (assign
);
1010 comp_inner_type
= TREE_TYPE (TREE_TYPE (a1
));
1011 comp_width
= vector_element_bits_tree (TREE_TYPE (a1
));
1015 if (expand_vec_cond_expr_p (type
, TREE_TYPE (a1
), code
))
1017 gcc_assert (TREE_CODE (a
) == SSA_NAME
|| TREE_CODE (a
) == VECTOR_CST
);
1021 /* Handle vector boolean types with bitmasks. If there is a comparison
1022 and we can expand the comparison into the vector boolean bitmask,
1023 or otherwise if it is compatible with type, we can transform
1024 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1027 tmp_7 = tmp_6 & vbfld_4;
1029 tmp_9 = tmp_8 & vbfld_5;
1030 vbfld_1 = tmp_7 | tmp_9;
1031 Similarly for vbfld_10 instead of x_2 < y_3. */
1032 if (VECTOR_BOOLEAN_TYPE_P (type
)
1033 && SCALAR_INT_MODE_P (TYPE_MODE (type
))
1034 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type
)),
1035 TYPE_VECTOR_SUBPARTS (type
)
1036 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type
))))
1038 ? useless_type_conversion_p (type
, TREE_TYPE (a
))
1039 : expand_vec_cmp_expr_p (TREE_TYPE (a1
), type
, TREE_CODE (a
))))
1041 if (a_is_comparison
)
1042 a
= gimplify_build2 (gsi
, code
, type
, a1
, a2
);
1043 a1
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, a
, b
);
1044 a2
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, type
, a
);
1045 a2
= gimplify_build2 (gsi
, BIT_AND_EXPR
, type
, a2
, c
);
1046 a
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, type
, a1
, a2
);
1047 gimple_assign_set_rhs_from_tree (gsi
, a
);
1048 update_stmt (gsi_stmt (*gsi
));
1052 /* TODO: try and find a smaller vector type. */
1054 warning_at (loc
, OPT_Wvector_operation_performance
,
1055 "vector condition will be expanded piecewise");
1057 if (!a_is_comparison
1058 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a
))
1059 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a
)))
1060 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a
))),
1061 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a
))
1062 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1063 (TREE_TYPE (TREE_TYPE (a
))))))
1065 a_is_scalar_bitmask
= true;
1066 int prec
= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a
)));
1067 tree atype
= build_nonstandard_integer_type (prec
, 1);
1068 a
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, atype
, a
);
1071 int nunits
= nunits_for_known_piecewise_op (type
);
1072 vec_alloc (v
, nunits
);
1073 for (int i
= 0; i
< nunits
; i
++)
1076 tree bb
= tree_vec_extract (gsi
, inner_type
, b
, width
, index
);
1077 tree cc
= tree_vec_extract (gsi
, inner_type
, c
, width
, index
);
1078 if (a_is_comparison
)
1080 tree aa1
= tree_vec_extract (gsi
, comp_inner_type
, a1
,
1081 comp_width
, comp_index
);
1082 tree aa2
= tree_vec_extract (gsi
, comp_inner_type
, a2
,
1083 comp_width
, comp_index
);
1084 aa
= fold_build2 (code
, cond_type
, aa1
, aa2
);
1086 else if (a_is_scalar_bitmask
)
1088 wide_int w
= wi::set_bit_in_zero (i
, TYPE_PRECISION (TREE_TYPE (a
)));
1089 result
= gimplify_build2 (gsi
, BIT_AND_EXPR
, TREE_TYPE (a
),
1090 a
, wide_int_to_tree (TREE_TYPE (a
), w
));
1091 aa
= fold_build2 (NE_EXPR
, boolean_type_node
, result
,
1092 build_zero_cst (TREE_TYPE (a
)));
1095 aa
= tree_vec_extract (gsi
, cond_type
, a
, width
, index
);
1096 result
= gimplify_build3 (gsi
, COND_EXPR
, inner_type
, aa
, bb
, cc
);
1097 constructor_elt ce
= {NULL_TREE
, result
};
1099 index
= int_const_binop (PLUS_EXPR
, index
, width
);
1100 if (width
== comp_width
)
1103 comp_index
= int_const_binop (PLUS_EXPR
, comp_index
, comp_width
);
1106 constr
= build_constructor (type
, v
);
1107 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1108 update_stmt (gsi_stmt (*gsi
));
1110 if (a_is_comparison
)
1111 bitmap_set_bit (dce_ssa_names
,
1112 SSA_NAME_VERSION (gimple_assign_lhs (assign
)));
1118 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
1119 gassign
*assign
, enum tree_code code
,
1120 bitmap dce_ssa_names
)
1122 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1124 /* If the compute mode is not a vector mode (hence we are not decomposing
1125 a BLKmode vector to smaller, hardware-supported vectors), we may want
1126 to expand the operations in parallel. */
1127 if (!VECTOR_MODE_P (compute_mode
))
1132 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
1133 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
1134 gimple_assign_rhs1 (assign
),
1135 gimple_assign_rhs2 (assign
), code
);
1139 if (ANY_INTEGRAL_TYPE_P (type
) && !TYPE_OVERFLOW_TRAPS (type
))
1140 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
1141 gimple_assign_rhs1 (assign
),
1148 return expand_vector_parallel (gsi
, do_binop
, type
,
1149 gimple_assign_rhs1 (assign
),
1150 gimple_assign_rhs2 (assign
), code
);
1153 return expand_vector_parallel (gsi
, do_unop
, type
,
1154 gimple_assign_rhs1 (assign
),
1169 case UNORDERED_EXPR
:
1171 tree rhs1
= gimple_assign_rhs1 (assign
);
1172 tree rhs2
= gimple_assign_rhs2 (assign
);
1174 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
,
1178 case TRUNC_DIV_EXPR
:
1179 case TRUNC_MOD_EXPR
:
1181 tree rhs1
= gimple_assign_rhs1 (assign
);
1182 tree rhs2
= gimple_assign_rhs2 (assign
);
1186 || !VECTOR_INTEGER_TYPE_P (type
)
1187 || TREE_CODE (rhs2
) != VECTOR_CST
1188 || !VECTOR_MODE_P (TYPE_MODE (type
)))
1191 ret
= expand_vector_divmod (gsi
, type
, rhs1
, rhs2
, code
);
1192 if (ret
!= NULL_TREE
)
1201 if (TREE_CODE_CLASS (code
) == tcc_unary
)
1202 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
1203 gimple_assign_rhs1 (assign
),
1206 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
1207 gimple_assign_rhs1 (assign
),
1208 gimple_assign_rhs2 (assign
), code
);
1212 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1214 _9 = { b_7, b_7, b_7, b_7 };
1215 a_5 = _9 + { 0, 3, 6, 9 };
1216 because vector splat operation is usually more efficient
1217 than piecewise initialization of the vector. */
1220 optimize_vector_constructor (gimple_stmt_iterator
*gsi
)
1222 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1223 tree lhs
= gimple_assign_lhs (stmt
);
1224 tree rhs
= gimple_assign_rhs1 (stmt
);
1225 tree type
= TREE_TYPE (rhs
);
1227 unsigned HOST_WIDE_INT nelts
;
1228 bool all_same
= true;
1229 constructor_elt
*elt
;
1231 tree base
= NULL_TREE
;
1234 if (!TYPE_VECTOR_SUBPARTS (type
).is_constant (&nelts
)
1236 || CONSTRUCTOR_NELTS (rhs
) != nelts
)
1238 op
= optab_for_tree_code (PLUS_EXPR
, type
, optab_default
);
1239 if (op
== unknown_optab
1240 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
1242 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs
), i
, elt
)
1243 if (TREE_CODE (elt
->value
) != SSA_NAME
1244 || TREE_CODE (TREE_TYPE (elt
->value
)) == VECTOR_TYPE
)
1248 tree this_base
= elt
->value
;
1249 if (this_base
!= CONSTRUCTOR_ELT (rhs
, 0)->value
)
1251 for (j
= 0; j
< nelts
+ 1; j
++)
1253 g
= SSA_NAME_DEF_STMT (this_base
);
1254 if (is_gimple_assign (g
)
1255 && gimple_assign_rhs_code (g
) == PLUS_EXPR
1256 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
1257 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
1258 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
1259 this_base
= gimple_assign_rhs1 (g
);
1265 else if (this_base
!= base
)
1270 tree_vector_builder
cst (type
, nelts
, 1);
1271 for (i
= 0; i
< nelts
; i
++)
1273 tree this_base
= CONSTRUCTOR_ELT (rhs
, i
)->value
;
1274 tree elt
= build_zero_cst (TREE_TYPE (base
));
1275 while (this_base
!= base
)
1277 g
= SSA_NAME_DEF_STMT (this_base
);
1278 elt
= fold_binary (PLUS_EXPR
, TREE_TYPE (base
),
1279 elt
, gimple_assign_rhs2 (g
));
1280 if (elt
== NULL_TREE
1281 || TREE_CODE (elt
) != INTEGER_CST
1282 || TREE_OVERFLOW (elt
))
1284 this_base
= gimple_assign_rhs1 (g
);
1286 cst
.quick_push (elt
);
1288 for (i
= 0; i
< nelts
; i
++)
1289 CONSTRUCTOR_ELT (rhs
, i
)->value
= base
;
1290 g
= gimple_build_assign (make_ssa_name (type
), rhs
);
1291 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1292 g
= gimple_build_assign (lhs
, PLUS_EXPR
, gimple_assign_lhs (g
),
1294 gsi_replace (gsi
, g
, false);
1297 /* Return a type for the widest vector mode whose components are of type
1298 TYPE, or NULL_TREE if none is found. */
1301 type_for_widest_vector_mode (tree type
, optab op
)
1303 machine_mode inner_mode
= TYPE_MODE (type
);
1304 machine_mode best_mode
= VOIDmode
, mode
;
1305 poly_int64 best_nunits
= 0;
1307 if (SCALAR_FLOAT_MODE_P (inner_mode
))
1308 mode
= MIN_MODE_VECTOR_FLOAT
;
1309 else if (SCALAR_FRACT_MODE_P (inner_mode
))
1310 mode
= MIN_MODE_VECTOR_FRACT
;
1311 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
1312 mode
= MIN_MODE_VECTOR_UFRACT
;
1313 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
1314 mode
= MIN_MODE_VECTOR_ACCUM
;
1315 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
1316 mode
= MIN_MODE_VECTOR_UACCUM
;
1317 else if (inner_mode
== BImode
)
1318 mode
= MIN_MODE_VECTOR_BOOL
;
1320 mode
= MIN_MODE_VECTOR_INT
;
1322 FOR_EACH_MODE_FROM (mode
, mode
)
1323 if (GET_MODE_INNER (mode
) == inner_mode
1324 && maybe_gt (GET_MODE_NUNITS (mode
), best_nunits
)
1325 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
1326 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
1328 if (best_mode
== VOIDmode
)
1331 return build_vector_type_for_mode (type
, best_mode
);
1335 /* Build a reference to the element of the vector VECT. Function
1336 returns either the element itself, either BIT_FIELD_REF, or an
1337 ARRAY_REF expression.
1339 GSI is required to insert temporary variables while building a
1340 refernece to the element of the vector VECT.
1342 PTMPVEC is a pointer to the temporary variable for caching
1343 purposes. In case when PTMPVEC is NULL new temporary variable
1346 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
1348 tree vect_type
, vect_elt_type
;
1352 bool need_asgn
= true;
1353 unsigned int elements
;
1355 vect_type
= TREE_TYPE (vect
);
1356 vect_elt_type
= TREE_TYPE (vect_type
);
1357 elements
= nunits_for_known_piecewise_op (vect_type
);
1359 if (TREE_CODE (idx
) == INTEGER_CST
)
1361 unsigned HOST_WIDE_INT index
;
1363 /* Given that we're about to compute a binary modulus,
1364 we don't care about the high bits of the value. */
1365 index
= TREE_INT_CST_LOW (idx
);
1366 if (!tree_fits_uhwi_p (idx
) || index
>= elements
)
1368 index
&= elements
- 1;
1369 idx
= build_int_cst (TREE_TYPE (idx
), index
);
1372 /* When lowering a vector statement sequence do some easy
1373 simplification by looking through intermediate vector results. */
1374 if (TREE_CODE (vect
) == SSA_NAME
)
1376 gimple
*def_stmt
= SSA_NAME_DEF_STMT (vect
);
1377 if (is_gimple_assign (def_stmt
)
1378 && (gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
1379 || gimple_assign_rhs_code (def_stmt
) == CONSTRUCTOR
))
1380 vect
= gimple_assign_rhs1 (def_stmt
);
1383 if (TREE_CODE (vect
) == VECTOR_CST
)
1384 return VECTOR_CST_ELT (vect
, index
);
1385 else if (TREE_CODE (vect
) == CONSTRUCTOR
1386 && (CONSTRUCTOR_NELTS (vect
) == 0
1387 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect
, 0)->value
))
1390 if (index
< CONSTRUCTOR_NELTS (vect
))
1391 return CONSTRUCTOR_ELT (vect
, index
)->value
;
1392 return build_zero_cst (vect_elt_type
);
1396 tree size
= vector_element_bits_tree (vect_type
);
1397 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (index
),
1399 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
1404 tmpvec
= create_tmp_var (vect_type
, "vectmp");
1406 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
1415 TREE_ADDRESSABLE (tmpvec
) = 1;
1416 asgn
= gimple_build_assign (tmpvec
, vect
);
1417 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
1420 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
1421 return build4 (ARRAY_REF
, vect_elt_type
,
1422 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
1423 idx
, NULL_TREE
, NULL_TREE
);
1426 /* Check if VEC_PERM_EXPR within the given setting is supported
1427 by hardware, or lower it piecewise.
1429 When VEC_PERM_EXPR has the same first and second operands:
1430 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1431 {v0[mask[0]], v0[mask[1]], ...}
1432 MASK and V0 must have the same number of elements.
1434 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1435 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1436 V0 and V1 must have the same type. MASK, V0, V1 must have the
1437 same number of arguments. */
1440 lower_vec_perm (gimple_stmt_iterator
*gsi
)
1442 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1443 tree mask
= gimple_assign_rhs3 (stmt
);
1444 tree vec0
= gimple_assign_rhs1 (stmt
);
1445 tree vec1
= gimple_assign_rhs2 (stmt
);
1446 tree vect_type
= TREE_TYPE (vec0
);
1447 tree mask_type
= TREE_TYPE (mask
);
1448 tree vect_elt_type
= TREE_TYPE (vect_type
);
1449 tree mask_elt_type
= TREE_TYPE (mask_type
);
1450 unsigned HOST_WIDE_INT elements
;
1451 vec
<constructor_elt
, va_gc
> *v
;
1452 tree constr
, t
, si
, i_val
;
1453 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
1454 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
1455 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1458 if (!TYPE_VECTOR_SUBPARTS (vect_type
).is_constant (&elements
))
1461 if (TREE_CODE (mask
) == SSA_NAME
)
1463 gimple
*def_stmt
= SSA_NAME_DEF_STMT (mask
);
1464 if (is_gimple_assign (def_stmt
)
1465 && gimple_assign_rhs_code (def_stmt
) == VECTOR_CST
)
1466 mask
= gimple_assign_rhs1 (def_stmt
);
1469 vec_perm_builder sel_int
;
1471 if (TREE_CODE (mask
) == VECTOR_CST
1472 && tree_to_vec_perm_builder (&sel_int
, mask
))
1474 vec_perm_indices
indices (sel_int
, 2, elements
);
1475 if (can_vec_perm_const_p (TYPE_MODE (vect_type
), indices
))
1477 gimple_assign_set_rhs3 (stmt
, mask
);
1481 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1482 vector as VEC1 and a right element shift MASK. */
1483 if (optab_handler (vec_shr_optab
, TYPE_MODE (vect_type
))
1485 && TREE_CODE (vec1
) == VECTOR_CST
1486 && initializer_zerop (vec1
)
1487 && maybe_ne (indices
[0], 0)
1488 && known_lt (poly_uint64 (indices
[0]), elements
))
1490 bool ok_p
= indices
.series_p (0, 1, indices
[0], 1);
1493 for (i
= 1; i
< elements
; ++i
)
1495 poly_uint64 actual
= indices
[i
];
1496 poly_uint64 expected
= i
+ indices
[0];
1497 /* Indices into the second vector are all equivalent. */
1498 if (maybe_lt (actual
, elements
)
1499 ? maybe_ne (actual
, expected
)
1500 : maybe_lt (expected
, elements
))
1503 ok_p
= i
== elements
;
1507 gimple_assign_set_rhs3 (stmt
, mask
);
1512 /* And similarly vec_shl pattern. */
1513 if (optab_handler (vec_shl_optab
, TYPE_MODE (vect_type
))
1515 && TREE_CODE (vec0
) == VECTOR_CST
1516 && initializer_zerop (vec0
))
1518 unsigned int first
= 0;
1519 for (i
= 0; i
< elements
; ++i
)
1520 if (known_eq (poly_uint64 (indices
[i
]), elements
))
1522 if (i
== 0 || first
)
1527 ? maybe_ne (poly_uint64 (indices
[i
]),
1528 elements
+ i
- first
)
1529 : maybe_ge (poly_uint64 (indices
[i
]), elements
))
1533 gimple_assign_set_rhs3 (stmt
, mask
);
1539 else if (can_vec_perm_var_p (TYPE_MODE (vect_type
)))
1542 warning_at (loc
, OPT_Wvector_operation_performance
,
1543 "vector shuffling operation will be expanded piecewise");
1545 vec_alloc (v
, elements
);
1546 for (i
= 0; i
< elements
; i
++)
1549 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
1551 if (TREE_CODE (i_val
) == INTEGER_CST
)
1553 unsigned HOST_WIDE_INT index
;
1555 index
= TREE_INT_CST_LOW (i_val
);
1556 if (!tree_fits_uhwi_p (i_val
) || index
>= elements
)
1557 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
1559 if (two_operand_p
&& (index
& elements
) != 0)
1560 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1562 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1564 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
1565 true, GSI_SAME_STMT
);
1569 tree cond
= NULL_TREE
, v0_val
;
1573 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1574 build_int_cst (mask_elt_type
, elements
));
1575 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1576 true, GSI_SAME_STMT
);
1579 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
1580 build_int_cst (mask_elt_type
, elements
- 1));
1581 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
1582 true, GSI_SAME_STMT
);
1584 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
1585 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
1586 true, GSI_SAME_STMT
);
1592 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
1593 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
1594 true, GSI_SAME_STMT
);
1596 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
1597 cond
, build_zero_cst (mask_elt_type
));
1598 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
1599 cond
, v0_val
, v1_val
);
1600 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
1601 true, GSI_SAME_STMT
);
1607 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, t
);
1610 constr
= build_constructor (vect_type
, v
);
1611 gimple_assign_set_rhs_from_tree (gsi
, constr
);
1612 update_stmt (gsi_stmt (*gsi
));
1615 /* If OP is a uniform vector return the element it is a splat from. */
1618 ssa_uniform_vector_p (tree op
)
1620 if (TREE_CODE (op
) == VECTOR_CST
1621 || TREE_CODE (op
) == VEC_DUPLICATE_EXPR
1622 || TREE_CODE (op
) == CONSTRUCTOR
)
1623 return uniform_vector_p (op
);
1624 if (TREE_CODE (op
) == SSA_NAME
)
1626 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
1627 if (gimple_assign_single_p (def_stmt
))
1628 return uniform_vector_p (gimple_assign_rhs1 (def_stmt
));
1633 /* Return type in which CODE operation with optab OP can be
1637 get_compute_type (enum tree_code code
, optab op
, tree type
)
1639 /* For very wide vectors, try using a smaller vector mode. */
1640 tree compute_type
= type
;
1642 && (!VECTOR_MODE_P (TYPE_MODE (type
))
1643 || optab_handler (op
, TYPE_MODE (type
)) == CODE_FOR_nothing
))
1645 tree vector_compute_type
1646 = type_for_widest_vector_mode (TREE_TYPE (type
), op
);
1647 if (vector_compute_type
!= NULL_TREE
1648 && subparts_gt (compute_type
, vector_compute_type
)
1649 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type
), 1U)
1650 && (optab_handler (op
, TYPE_MODE (vector_compute_type
))
1651 != CODE_FOR_nothing
))
1652 compute_type
= vector_compute_type
;
1655 /* If we are breaking a BLKmode vector into smaller pieces,
1656 type_for_widest_vector_mode has already looked into the optab,
1657 so skip these checks. */
1658 if (compute_type
== type
)
1660 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1661 if (VECTOR_MODE_P (compute_mode
))
1663 if (op
&& optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
1664 return compute_type
;
1665 if (code
== MULT_HIGHPART_EXPR
1666 && can_mult_highpart_p (compute_mode
,
1667 TYPE_UNSIGNED (compute_type
)))
1668 return compute_type
;
1670 /* There is no operation in hardware, so fall back to scalars. */
1671 compute_type
= TREE_TYPE (type
);
1674 return compute_type
;
1678 do_cond (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
1679 tree bitpos
, tree bitsize
, enum tree_code code
,
1680 tree type ATTRIBUTE_UNUSED
)
1682 if (TREE_CODE (TREE_TYPE (a
)) == VECTOR_TYPE
)
1683 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1684 if (TREE_CODE (TREE_TYPE (b
)) == VECTOR_TYPE
)
1685 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
1686 tree cond
= gimple_assign_rhs1 (gsi_stmt (*gsi
));
1687 return gimplify_build3 (gsi
, code
, inner_type
, unshare_expr (cond
), a
, b
);
1690 /* Expand a vector COND_EXPR to scalars, piecewise. */
1692 expand_vector_scalar_condition (gimple_stmt_iterator
*gsi
)
1694 gassign
*stmt
= as_a
<gassign
*> (gsi_stmt (*gsi
));
1695 tree type
= gimple_expr_type (stmt
);
1696 tree compute_type
= get_compute_type (COND_EXPR
, mov_optab
, type
);
1697 machine_mode compute_mode
= TYPE_MODE (compute_type
);
1698 gcc_assert (compute_mode
!= BLKmode
);
1699 tree lhs
= gimple_assign_lhs (stmt
);
1700 tree rhs2
= gimple_assign_rhs2 (stmt
);
1701 tree rhs3
= gimple_assign_rhs3 (stmt
);
1704 /* If the compute mode is not a vector mode (hence we are not decomposing
1705 a BLKmode vector to smaller, hardware-supported vectors), we may want
1706 to expand the operations in parallel. */
1707 if (!VECTOR_MODE_P (compute_mode
))
1708 new_rhs
= expand_vector_parallel (gsi
, do_cond
, type
, rhs2
, rhs3
,
1711 new_rhs
= expand_vector_piecewise (gsi
, do_cond
, type
, compute_type
,
1712 rhs2
, rhs3
, COND_EXPR
);
1713 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
1714 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
1717 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1718 way to do it is change expand_vector_operation and its callees to
1719 return a tree_code, RHS1 and RHS2 instead of a tree. */
1720 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
1721 update_stmt (gsi_stmt (*gsi
));
1724 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1725 lowering. If INNER_TYPE is not a vector type, this is a scalar
1729 do_vec_conversion (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
1730 tree decl
, tree bitpos
, tree bitsize
,
1731 enum tree_code code
, tree type
)
1733 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
1734 if (!VECTOR_TYPE_P (inner_type
))
1735 return gimplify_build1 (gsi
, code
, TREE_TYPE (type
), a
);
1736 if (code
== CALL_EXPR
)
1738 gimple
*g
= gimple_build_call (decl
, 1, a
);
1739 tree lhs
= make_ssa_name (TREE_TYPE (TREE_TYPE (decl
)));
1740 gimple_call_set_lhs (g
, lhs
);
1741 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
1746 tree outer_type
= build_vector_type (TREE_TYPE (type
),
1747 TYPE_VECTOR_SUBPARTS (inner_type
));
1748 return gimplify_build1 (gsi
, code
, outer_type
, a
);
1752 /* Similarly, but for narrowing conversion. */
1755 do_vec_narrow_conversion (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
1756 tree
, tree bitpos
, tree
, enum tree_code code
,
1759 tree itype
= build_vector_type (TREE_TYPE (inner_type
),
1760 exact_div (TYPE_VECTOR_SUBPARTS (inner_type
),
1762 tree b
= tree_vec_extract (gsi
, itype
, a
, TYPE_SIZE (itype
), bitpos
);
1763 tree c
= tree_vec_extract (gsi
, itype
, a
, TYPE_SIZE (itype
),
1764 int_const_binop (PLUS_EXPR
, bitpos
,
1765 TYPE_SIZE (itype
)));
1766 tree outer_type
= build_vector_type (TREE_TYPE (type
),
1767 TYPE_VECTOR_SUBPARTS (inner_type
));
1768 return gimplify_build2 (gsi
, code
, outer_type
, b
, c
);
1771 /* Expand VEC_CONVERT ifn call. */
1774 expand_vector_conversion (gimple_stmt_iterator
*gsi
)
1776 gimple
*stmt
= gsi_stmt (*gsi
);
1778 tree lhs
= gimple_call_lhs (stmt
);
1779 tree arg
= gimple_call_arg (stmt
, 0);
1780 tree ret_type
= TREE_TYPE (lhs
);
1781 tree arg_type
= TREE_TYPE (arg
);
1782 tree new_rhs
, compute_type
= TREE_TYPE (arg_type
);
1783 enum tree_code code
= NOP_EXPR
;
1784 enum tree_code code1
= ERROR_MARK
;
1785 enum { NARROW
, NONE
, WIDEN
} modifier
= NONE
;
1786 optab optab1
= unknown_optab
;
1788 gcc_checking_assert (VECTOR_TYPE_P (ret_type
) && VECTOR_TYPE_P (arg_type
));
1789 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type
))
1790 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type
)))
1791 code
= FIX_TRUNC_EXPR
;
1792 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type
))
1793 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type
)))
1795 unsigned int ret_elt_bits
= vector_element_bits (ret_type
);
1796 unsigned int arg_elt_bits
= vector_element_bits (arg_type
);
1797 if (ret_elt_bits
< arg_elt_bits
)
1799 else if (ret_elt_bits
> arg_elt_bits
)
1802 if (modifier
== NONE
&& (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
))
1804 if (supportable_convert_operation (code
, ret_type
, arg_type
, &code1
))
1806 g
= gimple_build_assign (lhs
, code1
, arg
);
1807 gsi_replace (gsi
, g
, false);
1810 /* Can't use get_compute_type here, as supportable_convert_operation
1811 doesn't necessarily use an optab and needs two arguments. */
1812 tree vec_compute_type
1813 = type_for_widest_vector_mode (TREE_TYPE (arg_type
), mov_optab
);
1814 if (vec_compute_type
1815 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type
))
1816 && subparts_gt (arg_type
, vec_compute_type
))
1818 unsigned HOST_WIDE_INT nelts
1819 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type
));
1822 tree ret1_type
= build_vector_type (TREE_TYPE (ret_type
), nelts
);
1823 tree arg1_type
= build_vector_type (TREE_TYPE (arg_type
), nelts
);
1824 if (supportable_convert_operation (code
, ret1_type
, arg1_type
,
1827 new_rhs
= expand_vector_piecewise (gsi
, do_vec_conversion
,
1828 ret_type
, arg1_type
, arg
,
1830 g
= gimple_build_assign (lhs
, new_rhs
);
1831 gsi_replace (gsi
, g
, false);
1838 else if (modifier
== NARROW
)
1843 code1
= VEC_PACK_TRUNC_EXPR
;
1844 optab1
= optab_for_tree_code (code1
, arg_type
, optab_default
);
1846 case FIX_TRUNC_EXPR
:
1847 code1
= VEC_PACK_FIX_TRUNC_EXPR
;
1848 /* The signedness is determined from output operand. */
1849 optab1
= optab_for_tree_code (code1
, ret_type
, optab_default
);
1852 code1
= VEC_PACK_FLOAT_EXPR
;
1853 optab1
= optab_for_tree_code (code1
, arg_type
, optab_default
);
1860 compute_type
= get_compute_type (code1
, optab1
, arg_type
);
1861 enum insn_code icode1
;
1862 if (VECTOR_TYPE_P (compute_type
)
1863 && ((icode1
= optab_handler (optab1
, TYPE_MODE (compute_type
)))
1864 != CODE_FOR_nothing
)
1865 && VECTOR_MODE_P (insn_data
[icode1
].operand
[0].mode
))
1868 = build_vector_type (TREE_TYPE (ret_type
),
1869 TYPE_VECTOR_SUBPARTS (compute_type
) * 2);
1870 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (cretd_type
))
1872 if (compute_type
== arg_type
)
1874 new_rhs
= gimplify_build2 (gsi
, code1
, cretd_type
,
1875 arg
, build_zero_cst (arg_type
));
1876 new_rhs
= tree_vec_extract (gsi
, ret_type
, new_rhs
,
1877 TYPE_SIZE (ret_type
),
1879 g
= gimple_build_assign (lhs
, new_rhs
);
1880 gsi_replace (gsi
, g
, false);
1884 = build_vector_type (TREE_TYPE (compute_type
),
1885 TYPE_VECTOR_SUBPARTS (compute_type
) * 2);
1886 if (TYPE_MAIN_VARIANT (dcompute_type
)
1887 == TYPE_MAIN_VARIANT (arg_type
))
1888 new_rhs
= do_vec_narrow_conversion (gsi
, dcompute_type
, arg
,
1889 NULL_TREE
, bitsize_int (0),
1893 new_rhs
= expand_vector_piecewise (gsi
,
1894 do_vec_narrow_conversion
,
1895 arg_type
, dcompute_type
,
1896 arg
, NULL_TREE
, code1
,
1898 g
= gimple_build_assign (lhs
, new_rhs
);
1899 gsi_replace (gsi
, g
, false);
1904 else if (modifier
== WIDEN
)
1906 enum tree_code code2
= ERROR_MARK
;
1907 optab optab2
= unknown_optab
;
1911 code1
= VEC_UNPACK_LO_EXPR
;
1912 code2
= VEC_UNPACK_HI_EXPR
;
1914 case FIX_TRUNC_EXPR
:
1915 code1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
1916 code2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
1919 code1
= VEC_UNPACK_FLOAT_LO_EXPR
;
1920 code2
= VEC_UNPACK_FLOAT_HI_EXPR
;
1925 if (BYTES_BIG_ENDIAN
)
1926 std::swap (code1
, code2
);
1928 if (code
== FIX_TRUNC_EXPR
)
1930 /* The signedness is determined from output operand. */
1931 optab1
= optab_for_tree_code (code1
, ret_type
, optab_default
);
1932 optab2
= optab_for_tree_code (code2
, ret_type
, optab_default
);
1936 optab1
= optab_for_tree_code (code1
, arg_type
, optab_default
);
1937 optab2
= optab_for_tree_code (code2
, arg_type
, optab_default
);
1940 if (optab1
&& optab2
)
1941 compute_type
= get_compute_type (code1
, optab1
, arg_type
);
1943 enum insn_code icode1
, icode2
;
1944 if (VECTOR_TYPE_P (compute_type
)
1945 && ((icode1
= optab_handler (optab1
, TYPE_MODE (compute_type
)))
1946 != CODE_FOR_nothing
)
1947 && ((icode2
= optab_handler (optab2
, TYPE_MODE (compute_type
)))
1948 != CODE_FOR_nothing
)
1949 && VECTOR_MODE_P (insn_data
[icode1
].operand
[0].mode
)
1950 && (insn_data
[icode1
].operand
[0].mode
1951 == insn_data
[icode2
].operand
[0].mode
))
1954 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type
), 2);
1955 tree cretd_type
= build_vector_type (TREE_TYPE (ret_type
), nunits
);
1956 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (cretd_type
))
1958 vec
<constructor_elt
, va_gc
> *v
;
1959 tree part_width
= TYPE_SIZE (compute_type
);
1960 tree index
= bitsize_int (0);
1961 int nunits
= nunits_for_known_piecewise_op (arg_type
);
1962 int delta
= tree_to_uhwi (part_width
) / arg_elt_bits
;
1964 location_t loc
= gimple_location (gsi_stmt (*gsi
));
1966 if (compute_type
!= arg_type
)
1967 warning_at (loc
, OPT_Wvector_operation_performance
,
1968 "vector operation will be expanded piecewise");
1975 vec_alloc (v
, (nunits
+ delta
- 1) / delta
* 2);
1976 for (i
= 0; i
< nunits
;
1977 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
,
1981 if (compute_type
!= arg_type
)
1982 a
= tree_vec_extract (gsi
, compute_type
, a
, part_width
,
1984 tree result
= gimplify_build1 (gsi
, code1
, cretd_type
, a
);
1985 constructor_elt ce
= { NULL_TREE
, result
};
1987 ce
.value
= gimplify_build1 (gsi
, code2
, cretd_type
, a
);
1991 new_rhs
= build_constructor (ret_type
, v
);
1992 g
= gimple_build_assign (lhs
, new_rhs
);
1993 gsi_replace (gsi
, g
, false);
1999 new_rhs
= expand_vector_piecewise (gsi
, do_vec_conversion
, arg_type
,
2000 TREE_TYPE (arg_type
), arg
,
2001 NULL_TREE
, code
, ret_type
);
2002 g
= gimple_build_assign (lhs
, new_rhs
);
2003 gsi_replace (gsi
, g
, false);
2006 /* Process one statement. If we identify a vector operation, expand it. */
2009 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
,
2010 bitmap dce_ssa_names
)
2012 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
= NULL_TREE
;
2013 enum tree_code code
;
2014 optab op
= unknown_optab
;
2015 enum gimple_rhs_class rhs_class
;
2018 /* Only consider code == GIMPLE_ASSIGN. */
2019 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (*gsi
));
2022 if (gimple_call_internal_p (gsi_stmt (*gsi
), IFN_VEC_CONVERT
))
2023 expand_vector_conversion (gsi
);
2027 code
= gimple_assign_rhs_code (stmt
);
2028 rhs_class
= get_gimple_rhs_class (code
);
2029 lhs
= gimple_assign_lhs (stmt
);
2031 if (code
== VEC_PERM_EXPR
)
2033 lower_vec_perm (gsi
);
2037 if (code
== VEC_COND_EXPR
)
2039 expand_vector_condition (gsi
, dce_ssa_names
);
2043 if (code
== COND_EXPR
2044 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
2045 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == BLKmode
)
2047 expand_vector_scalar_condition (gsi
);
2051 if (code
== CONSTRUCTOR
2052 && TREE_CODE (lhs
) == SSA_NAME
2053 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs
)))
2054 && !gimple_clobber_p (stmt
)
2057 optimize_vector_constructor (gsi
);
2061 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
2064 rhs1
= gimple_assign_rhs1 (stmt
);
2065 type
= gimple_expr_type (stmt
);
2066 if (rhs_class
== GIMPLE_BINARY_RHS
)
2067 rhs2
= gimple_assign_rhs2 (stmt
);
2069 if (!VECTOR_TYPE_P (type
)
2070 || !VECTOR_TYPE_P (TREE_TYPE (rhs1
)))
2073 /* A scalar operation pretending to be a vector one. */
2074 if (VECTOR_BOOLEAN_TYPE_P (type
)
2075 && !VECTOR_MODE_P (TYPE_MODE (type
))
2076 && TYPE_MODE (type
) != BLKmode
2077 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) != tcc_comparison
2078 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
))
2079 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1
)))
2080 && TYPE_MODE (TREE_TYPE (rhs1
)) != BLKmode
)))
2083 /* If the vector operation is operating on all same vector elements
2084 implement it with a scalar operation and a splat if the target
2085 supports the scalar operation. */
2086 tree srhs1
, srhs2
= NULL_TREE
;
2087 if ((srhs1
= ssa_uniform_vector_p (rhs1
)) != NULL_TREE
2088 && (rhs2
== NULL_TREE
2089 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2
))
2091 || (srhs2
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
2092 /* As we query direct optabs restrict to non-convert operations. */
2093 && TYPE_MODE (TREE_TYPE (type
)) == TYPE_MODE (TREE_TYPE (srhs1
)))
2095 op
= optab_for_tree_code (code
, TREE_TYPE (type
), optab_scalar
);
2096 if (op
>= FIRST_NORM_OPTAB
&& op
<= LAST_NORM_OPTAB
2097 && optab_handler (op
, TYPE_MODE (TREE_TYPE (type
))) != CODE_FOR_nothing
)
2099 tree slhs
= make_ssa_name (TREE_TYPE (srhs1
));
2100 gimple
*repl
= gimple_build_assign (slhs
, code
, srhs1
, srhs2
);
2101 gsi_insert_before (gsi
, repl
, GSI_SAME_STMT
);
2102 gimple_assign_set_rhs_from_tree (gsi
,
2103 build_vector_from_val (type
, slhs
));
2109 if (CONVERT_EXPR_CODE_P (code
)
2110 || code
== FLOAT_EXPR
2111 || code
== FIX_TRUNC_EXPR
2112 || code
== VIEW_CONVERT_EXPR
)
2115 /* The signedness is determined from input argument. */
2116 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
2117 || code
== VEC_UNPACK_FLOAT_LO_EXPR
2118 || code
== VEC_PACK_FLOAT_EXPR
)
2120 /* We do not know how to scalarize those. */
2124 /* For widening/narrowing vector operations, the relevant type is of the
2125 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2126 calculated in the same way above. */
2127 if (code
== WIDEN_SUM_EXPR
2128 || code
== VEC_WIDEN_MULT_HI_EXPR
2129 || code
== VEC_WIDEN_MULT_LO_EXPR
2130 || code
== VEC_WIDEN_MULT_EVEN_EXPR
2131 || code
== VEC_WIDEN_MULT_ODD_EXPR
2132 || code
== VEC_UNPACK_HI_EXPR
2133 || code
== VEC_UNPACK_LO_EXPR
2134 || code
== VEC_UNPACK_FIX_TRUNC_HI_EXPR
2135 || code
== VEC_UNPACK_FIX_TRUNC_LO_EXPR
2136 || code
== VEC_PACK_TRUNC_EXPR
2137 || code
== VEC_PACK_SAT_EXPR
2138 || code
== VEC_PACK_FIX_TRUNC_EXPR
2139 || code
== VEC_WIDEN_LSHIFT_HI_EXPR
2140 || code
== VEC_WIDEN_LSHIFT_LO_EXPR
)
2142 /* We do not know how to scalarize those. */
2146 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2148 if (code
== LSHIFT_EXPR
2149 || code
== RSHIFT_EXPR
2150 || code
== LROTATE_EXPR
2151 || code
== RROTATE_EXPR
)
2155 /* Check whether we have vector <op> {x,x,x,x} where x
2156 could be a scalar variable or a constant. Transform
2157 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2158 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
2162 if ((first
= ssa_uniform_vector_p (rhs2
)) != NULL_TREE
)
2164 gimple_assign_set_rhs2 (stmt
, first
);
2170 opv
= optab_for_tree_code (code
, type
, optab_vector
);
2171 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
2175 op
= optab_for_tree_code (code
, type
, optab_scalar
);
2177 compute_type
= get_compute_type (code
, op
, type
);
2178 if (compute_type
== type
)
2180 /* The rtl expander will expand vector/scalar as vector/vector
2181 if necessary. Pick one with wider vector type. */
2182 tree compute_vtype
= get_compute_type (code
, opv
, type
);
2183 if (subparts_gt (compute_vtype
, compute_type
))
2185 compute_type
= compute_vtype
;
2190 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
2192 if (compute_type
== NULL_TREE
)
2193 compute_type
= get_compute_type (code
, op
, type
);
2194 if (compute_type
== type
)
2196 /* Before splitting vector rotates into scalar rotates,
2197 see if we can't use vector shifts and BIT_IOR_EXPR
2198 instead. For vector by vector rotates we'd also
2199 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2200 for now, fold doesn't seem to create such rotates anyway. */
2201 if (compute_type
== TREE_TYPE (type
)
2202 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2
)))
2204 optab oplv
= vashl_optab
, opl
= ashl_optab
;
2205 optab oprv
= vlshr_optab
, opr
= lshr_optab
, opo
= ior_optab
;
2206 tree compute_lvtype
= get_compute_type (LSHIFT_EXPR
, oplv
, type
);
2207 tree compute_rvtype
= get_compute_type (RSHIFT_EXPR
, oprv
, type
);
2208 tree compute_otype
= get_compute_type (BIT_IOR_EXPR
, opo
, type
);
2209 tree compute_ltype
= get_compute_type (LSHIFT_EXPR
, opl
, type
);
2210 tree compute_rtype
= get_compute_type (RSHIFT_EXPR
, opr
, type
);
2211 /* The rtl expander will expand vector/scalar as vector/vector
2212 if necessary. Pick one with wider vector type. */
2213 if (subparts_gt (compute_lvtype
, compute_ltype
))
2215 compute_ltype
= compute_lvtype
;
2218 if (subparts_gt (compute_rvtype
, compute_rtype
))
2220 compute_rtype
= compute_rvtype
;
2223 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2225 compute_type
= compute_ltype
;
2226 if (subparts_gt (compute_type
, compute_rtype
))
2227 compute_type
= compute_rtype
;
2228 if (subparts_gt (compute_type
, compute_otype
))
2229 compute_type
= compute_otype
;
2230 /* Verify all 3 operations can be performed in that type. */
2231 if (compute_type
!= TREE_TYPE (type
))
2233 if (optab_handler (opl
, TYPE_MODE (compute_type
))
2235 || optab_handler (opr
, TYPE_MODE (compute_type
))
2237 || optab_handler (opo
, TYPE_MODE (compute_type
))
2238 == CODE_FOR_nothing
)
2239 compute_type
= TREE_TYPE (type
);
2245 op
= optab_for_tree_code (code
, type
, optab_default
);
2247 /* Optabs will try converting a negation into a subtraction, so
2248 look for it as well. TODO: negation of floating-point vectors
2249 might be turned into an exclusive OR toggling the sign bit. */
2250 if (op
== unknown_optab
2251 && code
== NEGATE_EXPR
2252 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
2253 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
2255 if (compute_type
== NULL_TREE
)
2256 compute_type
= get_compute_type (code
, op
, type
);
2257 if (compute_type
== type
)
2260 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
,
2263 /* Leave expression untouched for later expansion. */
2264 if (new_rhs
== NULL_TREE
)
2267 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
2268 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
2271 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2272 way to do it is change expand_vector_operation and its callees to
2273 return a tree_code, RHS1 and RHS2 instead of a tree. */
2274 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
2275 update_stmt (gsi_stmt (*gsi
));
2278 /* Use this to lower vector operations introduced by the vectorizer,
2279 if it may need the bit-twiddling tricks implemented in this file. */
2282 expand_vector_operations (void)
2284 gimple_stmt_iterator gsi
;
2286 bool cfg_changed
= false;
2288 auto_bitmap dce_ssa_names
;
2290 FOR_EACH_BB_FN (bb
, cfun
)
2292 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2294 expand_vector_operations_1 (&gsi
, dce_ssa_names
);
2295 /* ??? If we do not cleanup EH then we will ICE in
2296 verification. But in reality we have created wrong-code
2297 as we did not properly transition EH info and edges to
2298 the piecewise computations. */
2299 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
2300 && gimple_purge_dead_eh_edges (bb
))
2305 simple_dce_from_worklist (dce_ssa_names
);
2307 return cfg_changed
? TODO_cleanup_cfg
: 0;
2312 const pass_data pass_data_lower_vector
=
2314 GIMPLE_PASS
, /* type */
2315 "veclower", /* name */
2316 OPTGROUP_VEC
, /* optinfo_flags */
2317 TV_NONE
, /* tv_id */
2318 PROP_cfg
, /* properties_required */
2319 PROP_gimple_lvec
, /* properties_provided */
2320 0, /* properties_destroyed */
2321 0, /* todo_flags_start */
2322 TODO_update_ssa
, /* todo_flags_finish */
2325 class pass_lower_vector
: public gimple_opt_pass
2328 pass_lower_vector (gcc::context
*ctxt
)
2329 : gimple_opt_pass (pass_data_lower_vector
, ctxt
)
2332 /* opt_pass methods: */
2333 virtual bool gate (function
*fun
)
2335 return !(fun
->curr_properties
& PROP_gimple_lvec
);
2338 virtual unsigned int execute (function
*)
2340 return expand_vector_operations ();
2343 }; // class pass_lower_vector
2348 make_pass_lower_vector (gcc::context
*ctxt
)
2350 return new pass_lower_vector (ctxt
);
2355 const pass_data pass_data_lower_vector_ssa
=
2357 GIMPLE_PASS
, /* type */
2358 "veclower2", /* name */
2359 OPTGROUP_VEC
, /* optinfo_flags */
2360 TV_NONE
, /* tv_id */
2361 PROP_cfg
, /* properties_required */
2362 PROP_gimple_lvec
, /* properties_provided */
2363 0, /* properties_destroyed */
2364 0, /* todo_flags_start */
2366 | TODO_cleanup_cfg
), /* todo_flags_finish */
2369 class pass_lower_vector_ssa
: public gimple_opt_pass
2372 pass_lower_vector_ssa (gcc::context
*ctxt
)
2373 : gimple_opt_pass (pass_data_lower_vector_ssa
, ctxt
)
2376 /* opt_pass methods: */
2377 opt_pass
* clone () { return new pass_lower_vector_ssa (m_ctxt
); }
2378 virtual unsigned int execute (function
*)
2380 return expand_vector_operations ();
2383 }; // class pass_lower_vector_ssa
2388 make_pass_lower_vector_ssa (gcc::context
*ctxt
)
2390 return new pass_lower_vector_ssa (ctxt
);
2393 #include "gt-tree-vect-generic.h"