1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
33 #include "diagnostic.h"
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
40 static void expand_vector_operations_1 (gimple_stmt_iterator
*);
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
46 build_replicated_const (tree type
, tree inner_type
, HOST_WIDE_INT value
)
48 int width
= tree_low_cst (TYPE_SIZE (inner_type
), 1);
49 int n
= HOST_BITS_PER_WIDE_INT
/ width
;
50 unsigned HOST_WIDE_INT low
, high
, mask
;
55 if (width
== HOST_BITS_PER_WIDE_INT
)
59 mask
= ((HOST_WIDE_INT
)1 << width
) - 1;
60 low
= (unsigned HOST_WIDE_INT
) ~0 / mask
* (value
& mask
);
63 if (TYPE_PRECISION (type
) < HOST_BITS_PER_WIDE_INT
)
64 low
&= ((HOST_WIDE_INT
)1 << TYPE_PRECISION (type
)) - 1, high
= 0;
65 else if (TYPE_PRECISION (type
) == HOST_BITS_PER_WIDE_INT
)
67 else if (TYPE_PRECISION (type
) == 2 * HOST_BITS_PER_WIDE_INT
)
72 ret
= build_int_cst_wide (type
, low
, high
);
76 static GTY(()) tree vector_inner_type
;
77 static GTY(()) tree vector_last_type
;
78 static GTY(()) int vector_last_nunits
;
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
83 build_word_mode_vector_type (int nunits
)
85 if (!vector_inner_type
)
86 vector_inner_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
87 else if (vector_last_nunits
== nunits
)
89 gcc_assert (TREE_CODE (vector_last_type
) == VECTOR_TYPE
);
90 return vector_last_type
;
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits
= nunits
;
96 vector_last_type
= type_hash_canon (nunits
,
97 build_vector_type (vector_inner_type
,
99 return vector_last_type
;
102 typedef tree (*elem_op_func
) (gimple_stmt_iterator
*,
103 tree
, tree
, tree
, tree
, tree
, enum tree_code
);
106 tree_vec_extract (gimple_stmt_iterator
*gsi
, tree type
,
107 tree t
, tree bitsize
, tree bitpos
)
110 return gimplify_build3 (gsi
, BIT_FIELD_REF
, type
, t
, bitsize
, bitpos
);
112 return gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, type
, t
);
116 do_unop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
,
117 tree b ATTRIBUTE_UNUSED
, tree bitpos
, tree bitsize
,
120 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
121 return gimplify_build1 (gsi
, code
, inner_type
, a
);
125 do_binop (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
126 tree bitpos
, tree bitsize
, enum tree_code code
)
128 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
129 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
130 return gimplify_build2 (gsi
, code
, inner_type
, a
, b
);
133 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
135 INNER_TYPE is the type of A and B elements
137 returned expression is of signed integer type with the
138 size equal to the size of INNER_TYPE. */
140 do_compare (gimple_stmt_iterator
*gsi
, tree inner_type
, tree a
, tree b
,
141 tree bitpos
, tree bitsize
, enum tree_code code
)
145 a
= tree_vec_extract (gsi
, inner_type
, a
, bitsize
, bitpos
);
146 b
= tree_vec_extract (gsi
, inner_type
, b
, bitsize
, bitpos
);
148 comp_type
= build_nonstandard_integer_type
149 (GET_MODE_BITSIZE (TYPE_MODE (inner_type
)), 0);
151 return gimplify_build3 (gsi
, COND_EXPR
, comp_type
,
152 fold_build2 (code
, boolean_type_node
, a
, b
),
153 build_int_cst (comp_type
, -1),
154 build_int_cst (comp_type
, 0));
157 /* Expand vector addition to scalars. This does bit twiddling
158 in order to increase parallelism:
160 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
164 (a ^ ~b) & 0x80808080
166 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
168 This optimization should be done only if 4 vector items or more
171 do_plus_minus (gimple_stmt_iterator
*gsi
, tree word_type
, tree a
, tree b
,
172 tree bitpos ATTRIBUTE_UNUSED
, tree bitsize ATTRIBUTE_UNUSED
,
175 tree inner_type
= TREE_TYPE (TREE_TYPE (a
));
176 unsigned HOST_WIDE_INT max
;
177 tree low_bits
, high_bits
, a_low
, b_low
, result_low
, signs
;
179 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
180 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
181 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
183 a
= tree_vec_extract (gsi
, word_type
, a
, bitsize
, bitpos
);
184 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
186 signs
= gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, a
, b
);
187 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
188 if (code
== PLUS_EXPR
)
189 a_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, a
, low_bits
);
192 a_low
= gimplify_build2 (gsi
, BIT_IOR_EXPR
, word_type
, a
, high_bits
);
193 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, signs
);
196 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
197 result_low
= gimplify_build2 (gsi
, code
, word_type
, a_low
, b_low
);
198 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
202 do_negate (gimple_stmt_iterator
*gsi
, tree word_type
, tree b
,
203 tree unused ATTRIBUTE_UNUSED
, tree bitpos ATTRIBUTE_UNUSED
,
204 tree bitsize ATTRIBUTE_UNUSED
,
205 enum tree_code code ATTRIBUTE_UNUSED
)
207 tree inner_type
= TREE_TYPE (TREE_TYPE (b
));
209 tree low_bits
, high_bits
, b_low
, result_low
, signs
;
211 max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
212 low_bits
= build_replicated_const (word_type
, inner_type
, max
>> 1);
213 high_bits
= build_replicated_const (word_type
, inner_type
, max
& ~(max
>> 1));
215 b
= tree_vec_extract (gsi
, word_type
, b
, bitsize
, bitpos
);
217 b_low
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, b
, low_bits
);
218 signs
= gimplify_build1 (gsi
, BIT_NOT_EXPR
, word_type
, b
);
219 signs
= gimplify_build2 (gsi
, BIT_AND_EXPR
, word_type
, signs
, high_bits
);
220 result_low
= gimplify_build2 (gsi
, MINUS_EXPR
, word_type
, high_bits
, b_low
);
221 return gimplify_build2 (gsi
, BIT_XOR_EXPR
, word_type
, result_low
, signs
);
224 /* Expand a vector operation to scalars, by using many operations
225 whose type is the vector type's inner type. */
227 expand_vector_piecewise (gimple_stmt_iterator
*gsi
, elem_op_func f
,
228 tree type
, tree inner_type
,
229 tree a
, tree b
, enum tree_code code
)
231 VEC(constructor_elt
,gc
) *v
;
232 tree part_width
= TYPE_SIZE (inner_type
);
233 tree index
= bitsize_int (0);
234 int nunits
= TYPE_VECTOR_SUBPARTS (type
);
235 int delta
= tree_low_cst (part_width
, 1)
236 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
239 v
= VEC_alloc(constructor_elt
, gc
, (nunits
+ delta
- 1) / delta
);
240 for (i
= 0; i
< nunits
;
241 i
+= delta
, index
= int_const_binop (PLUS_EXPR
, index
, part_width
))
243 tree result
= f (gsi
, inner_type
, a
, b
, index
, part_width
, code
);
244 constructor_elt
*ce
= VEC_quick_push (constructor_elt
, v
, NULL
);
245 ce
->index
= NULL_TREE
;
249 return build_constructor (type
, v
);
252 /* Expand a vector operation to scalars with the freedom to use
253 a scalar integer type, or to use a different size for the items
254 in the vector type. */
256 expand_vector_parallel (gimple_stmt_iterator
*gsi
, elem_op_func f
, tree type
,
260 tree result
, compute_type
;
261 enum machine_mode mode
;
262 int n_words
= tree_low_cst (TYPE_SIZE_UNIT (type
), 1) / UNITS_PER_WORD
;
264 /* We have three strategies. If the type is already correct, just do
265 the operation an element at a time. Else, if the vector is wider than
266 one word, do it a word at a time; finally, if the vector is smaller
267 than one word, do it as a scalar. */
268 if (TYPE_MODE (TREE_TYPE (type
)) == word_mode
)
269 return expand_vector_piecewise (gsi
, f
,
270 type
, TREE_TYPE (type
),
272 else if (n_words
> 1)
274 tree word_type
= build_word_mode_vector_type (n_words
);
275 result
= expand_vector_piecewise (gsi
, f
,
276 word_type
, TREE_TYPE (word_type
),
278 result
= force_gimple_operand_gsi (gsi
, result
, true, NULL
, true,
283 /* Use a single scalar operation with a mode no wider than word_mode. */
284 mode
= mode_for_size (tree_low_cst (TYPE_SIZE (type
), 1), MODE_INT
, 0);
285 compute_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
286 result
= f (gsi
, compute_type
, a
, b
, NULL_TREE
, NULL_TREE
, code
);
292 /* Expand a vector operation to scalars; for integer types we can use
293 special bit twiddling tricks to do the sums a word at a time, using
294 function F_PARALLEL instead of F. These tricks are done only if
295 they can process at least four items, that is, only if the vector
296 holds at least four items and if a word can hold four items. */
298 expand_vector_addition (gimple_stmt_iterator
*gsi
,
299 elem_op_func f
, elem_op_func f_parallel
,
300 tree type
, tree a
, tree b
, enum tree_code code
)
302 int parts_per_word
= UNITS_PER_WORD
303 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type
)), 1);
305 if (INTEGRAL_TYPE_P (TREE_TYPE (type
))
306 && parts_per_word
>= 4
307 && TYPE_VECTOR_SUBPARTS (type
) >= 4)
308 return expand_vector_parallel (gsi
, f_parallel
,
311 return expand_vector_piecewise (gsi
, f
,
312 type
, TREE_TYPE (type
),
316 /* Check if vector VEC consists of all the equal elements and
317 that the number of elements corresponds to the type of VEC.
318 The function returns first element of the vector
319 or NULL_TREE if the vector is not uniform. */
321 uniform_vector_p (tree vec
)
326 if (vec
== NULL_TREE
)
329 if (TREE_CODE (vec
) == VECTOR_CST
)
331 els
= TREE_VECTOR_CST_ELTS (vec
);
332 first
= TREE_VALUE (els
);
333 els
= TREE_CHAIN (els
);
335 for (t
= els
; t
; t
= TREE_CHAIN (t
))
336 if (!operand_equal_p (first
, TREE_VALUE (t
), 0))
342 else if (TREE_CODE (vec
) == CONSTRUCTOR
)
344 first
= error_mark_node
;
346 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec
), i
, t
)
353 if (!operand_equal_p (first
, t
, 0))
356 if (i
!= TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec
)))
365 /* Try to expand vector comparison expression OP0 CODE OP1 by
366 querying optab if the following expression:
367 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
370 expand_vector_comparison (gimple_stmt_iterator
*gsi
, tree type
, tree op0
,
371 tree op1
, enum tree_code code
)
374 if (! expand_vec_cond_expr_p (type
, TREE_TYPE (op0
)))
375 t
= expand_vector_piecewise (gsi
, do_compare
, type
,
376 TREE_TYPE (TREE_TYPE (op0
)), op0
, op1
, code
);
384 expand_vector_operation (gimple_stmt_iterator
*gsi
, tree type
, tree compute_type
,
385 gimple assign
, enum tree_code code
)
387 enum machine_mode compute_mode
= TYPE_MODE (compute_type
);
389 /* If the compute mode is not a vector mode (hence we are not decomposing
390 a BLKmode vector to smaller, hardware-supported vectors), we may want
391 to expand the operations in parallel. */
392 if (GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_INT
393 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FLOAT
394 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_FRACT
395 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UFRACT
396 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_ACCUM
397 && GET_MODE_CLASS (compute_mode
) != MODE_VECTOR_UACCUM
)
402 if (!TYPE_OVERFLOW_TRAPS (type
))
403 return expand_vector_addition (gsi
, do_binop
, do_plus_minus
, type
,
404 gimple_assign_rhs1 (assign
),
405 gimple_assign_rhs2 (assign
), code
);
409 if (!TYPE_OVERFLOW_TRAPS (type
))
410 return expand_vector_addition (gsi
, do_unop
, do_negate
, type
,
411 gimple_assign_rhs1 (assign
),
418 return expand_vector_parallel (gsi
, do_binop
, type
,
419 gimple_assign_rhs1 (assign
),
420 gimple_assign_rhs2 (assign
), code
);
423 return expand_vector_parallel (gsi
, do_unop
, type
,
424 gimple_assign_rhs1 (assign
),
441 tree rhs1
= gimple_assign_rhs1 (assign
);
442 tree rhs2
= gimple_assign_rhs2 (assign
);
444 return expand_vector_comparison (gsi
, type
, rhs1
, rhs2
, code
);
450 if (TREE_CODE_CLASS (code
) == tcc_unary
)
451 return expand_vector_piecewise (gsi
, do_unop
, type
, compute_type
,
452 gimple_assign_rhs1 (assign
),
455 return expand_vector_piecewise (gsi
, do_binop
, type
, compute_type
,
456 gimple_assign_rhs1 (assign
),
457 gimple_assign_rhs2 (assign
), code
);
460 /* Return a type for the widest vector mode whose components are of mode
461 INNER_MODE, or NULL_TREE if none is found.
462 SATP is true for saturating fixed-point types. */
465 type_for_widest_vector_mode (enum machine_mode inner_mode
, optab op
, int satp
)
467 enum machine_mode best_mode
= VOIDmode
, mode
;
470 if (SCALAR_FLOAT_MODE_P (inner_mode
))
471 mode
= MIN_MODE_VECTOR_FLOAT
;
472 else if (SCALAR_FRACT_MODE_P (inner_mode
))
473 mode
= MIN_MODE_VECTOR_FRACT
;
474 else if (SCALAR_UFRACT_MODE_P (inner_mode
))
475 mode
= MIN_MODE_VECTOR_UFRACT
;
476 else if (SCALAR_ACCUM_MODE_P (inner_mode
))
477 mode
= MIN_MODE_VECTOR_ACCUM
;
478 else if (SCALAR_UACCUM_MODE_P (inner_mode
))
479 mode
= MIN_MODE_VECTOR_UACCUM
;
481 mode
= MIN_MODE_VECTOR_INT
;
483 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
484 if (GET_MODE_INNER (mode
) == inner_mode
485 && GET_MODE_NUNITS (mode
) > best_nunits
486 && optab_handler (op
, mode
) != CODE_FOR_nothing
)
487 best_mode
= mode
, best_nunits
= GET_MODE_NUNITS (mode
);
489 if (best_mode
== VOIDmode
)
493 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
494 if (ALL_FIXED_POINT_MODE_P (best_mode
))
495 return lang_hooks
.types
.type_for_mode (best_mode
, satp
);
497 return lang_hooks
.types
.type_for_mode (best_mode
, 1);
502 /* Build a reference to the element of the vector VECT. Function
503 returns either the element itself, either BIT_FIELD_REF, or an
504 ARRAY_REF expression.
506 GSI is requred to insert temporary variables while building a
507 refernece to the element of the vector VECT.
509 PTMPVEC is a pointer to the temporary variable for caching
510 purposes. In case when PTMPVEC is NULL new temporary variable
513 vector_element (gimple_stmt_iterator
*gsi
, tree vect
, tree idx
, tree
*ptmpvec
)
515 tree vect_type
, vect_elt_type
;
519 bool need_asgn
= true;
520 unsigned int elements
;
522 vect_type
= TREE_TYPE (vect
);
523 vect_elt_type
= TREE_TYPE (vect_type
);
524 elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
526 if (TREE_CODE (idx
) == INTEGER_CST
)
528 unsigned HOST_WIDE_INT index
;
530 /* Given that we're about to compute a binary modulus,
531 we don't care about the high bits of the value. */
532 index
= TREE_INT_CST_LOW (idx
);
533 if (!host_integerp (idx
, 1) || index
>= elements
)
535 index
&= elements
- 1;
536 idx
= build_int_cst (TREE_TYPE (idx
), index
);
539 if (TREE_CODE (vect
) == VECTOR_CST
)
542 tree vals
= TREE_VECTOR_CST_ELTS (vect
);
543 for (i
= 0; vals
; vals
= TREE_CHAIN (vals
), ++i
)
545 return TREE_VALUE (vals
);
546 return build_zero_cst (vect_elt_type
);
548 else if (TREE_CODE (vect
) == CONSTRUCTOR
)
553 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect
), i
, elt_i
, elt_v
)
554 if (operand_equal_p (elt_i
, idx
, 0))
556 return build_zero_cst (vect_elt_type
);
560 tree size
= TYPE_SIZE (vect_elt_type
);
561 tree pos
= fold_build2 (MULT_EXPR
, TREE_TYPE (idx
), idx
, size
);
562 return fold_build3 (BIT_FIELD_REF
, vect_elt_type
, vect
, size
, pos
);
567 tmpvec
= create_tmp_var (vect_type
, "vectmp");
569 tmpvec
= *ptmpvec
= create_tmp_var (vect_type
, "vectmp");
578 TREE_ADDRESSABLE (tmpvec
) = 1;
579 asgn
= gimple_build_assign (tmpvec
, vect
);
580 gsi_insert_before (gsi
, asgn
, GSI_SAME_STMT
);
583 arraytype
= build_array_type_nelts (vect_elt_type
, elements
);
584 return build4 (ARRAY_REF
, vect_elt_type
,
585 build1 (VIEW_CONVERT_EXPR
, arraytype
, tmpvec
),
586 idx
, NULL_TREE
, NULL_TREE
);
589 /* Check if VEC_SHUFFLE_EXPR within the given setting is supported
590 by hardware, or lower it piecewise.
592 When VEC_SHUFFLE_EXPR has the same first and second operands:
593 VEC_SHUFFLE_EXPR <v0, v0, mask> the lowered version would be
594 {v0[mask[0]], v0[mask[1]], ...}
595 MASK and V0 must have the same number of elements.
597 Otherwise VEC_SHUFFLE_EXPR <v0, v1, mask> is lowered to
598 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
599 V0 and V1 must have the same type. MASK, V0, V1 must have the
600 same number of arguments. */
603 lower_vec_shuffle (gimple_stmt_iterator
*gsi
)
605 gimple stmt
= gsi_stmt (*gsi
);
606 tree mask
= gimple_assign_rhs3 (stmt
);
607 tree vec0
= gimple_assign_rhs1 (stmt
);
608 tree vec1
= gimple_assign_rhs2 (stmt
);
609 tree vect_type
= TREE_TYPE (vec0
);
610 tree mask_type
= TREE_TYPE (mask
);
611 tree vect_elt_type
= TREE_TYPE (vect_type
);
612 tree mask_elt_type
= TREE_TYPE (mask_type
);
613 unsigned int elements
= TYPE_VECTOR_SUBPARTS (vect_type
);
614 VEC(constructor_elt
,gc
) *v
;
615 tree constr
, t
, si
, i_val
;
616 tree vec0tmp
= NULL_TREE
, vec1tmp
= NULL_TREE
, masktmp
= NULL_TREE
;
617 bool two_operand_p
= !operand_equal_p (vec0
, vec1
, 0);
620 if (expand_vec_shuffle_expr_p (TYPE_MODE (vect_type
), vec0
, vec1
, mask
))
623 v
= VEC_alloc (constructor_elt
, gc
, elements
);
624 for (i
= 0; i
< elements
; i
++)
627 i_val
= vector_element (gsi
, mask
, si
, &masktmp
);
629 if (TREE_CODE (i_val
) == INTEGER_CST
)
631 unsigned HOST_WIDE_INT index
;
633 index
= TREE_INT_CST_LOW (i_val
);
634 if (!host_integerp (i_val
, 1) || index
>= elements
)
635 i_val
= build_int_cst (mask_elt_type
, index
& (elements
- 1));
637 if (two_operand_p
&& (index
& elements
) != 0)
638 t
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
640 t
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
642 t
= force_gimple_operand_gsi (gsi
, t
, true, NULL_TREE
,
643 true, GSI_SAME_STMT
);
647 tree cond
= NULL_TREE
, v0_val
;
651 cond
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
652 build_int_cst (mask_elt_type
, elements
));
653 cond
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
654 true, GSI_SAME_STMT
);
657 i_val
= fold_build2 (BIT_AND_EXPR
, mask_elt_type
, i_val
,
658 build_int_cst (mask_elt_type
, elements
- 1));
659 i_val
= force_gimple_operand_gsi (gsi
, i_val
, true, NULL_TREE
,
660 true, GSI_SAME_STMT
);
662 v0_val
= vector_element (gsi
, vec0
, i_val
, &vec0tmp
);
663 v0_val
= force_gimple_operand_gsi (gsi
, v0_val
, true, NULL_TREE
,
664 true, GSI_SAME_STMT
);
670 v1_val
= vector_element (gsi
, vec1
, i_val
, &vec1tmp
);
671 v1_val
= force_gimple_operand_gsi (gsi
, v1_val
, true, NULL_TREE
,
672 true, GSI_SAME_STMT
);
674 cond
= fold_build2 (EQ_EXPR
, boolean_type_node
,
675 cond
, build_zero_cst (mask_elt_type
));
676 cond
= fold_build3 (COND_EXPR
, vect_elt_type
,
677 cond
, v0_val
, v1_val
);
678 t
= force_gimple_operand_gsi (gsi
, cond
, true, NULL_TREE
,
679 true, GSI_SAME_STMT
);
685 CONSTRUCTOR_APPEND_ELT (v
, si
, t
);
688 constr
= build_constructor (vect_type
, v
);
689 gimple_assign_set_rhs_from_tree (gsi
, constr
);
690 update_stmt (gsi_stmt (*gsi
));
693 /* Process one statement. If we identify a vector operation, expand it. */
696 expand_vector_operations_1 (gimple_stmt_iterator
*gsi
)
698 gimple stmt
= gsi_stmt (*gsi
);
699 tree lhs
, rhs1
, rhs2
= NULL
, type
, compute_type
;
701 enum machine_mode compute_mode
;
703 enum gimple_rhs_class rhs_class
;
706 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
709 code
= gimple_assign_rhs_code (stmt
);
710 rhs_class
= get_gimple_rhs_class (code
);
711 lhs
= gimple_assign_lhs (stmt
);
713 if (code
== VEC_SHUFFLE_EXPR
)
715 lower_vec_shuffle (gsi
);
719 if (rhs_class
!= GIMPLE_UNARY_RHS
&& rhs_class
!= GIMPLE_BINARY_RHS
)
722 rhs1
= gimple_assign_rhs1 (stmt
);
723 type
= gimple_expr_type (stmt
);
724 if (rhs_class
== GIMPLE_BINARY_RHS
)
725 rhs2
= gimple_assign_rhs2 (stmt
);
727 if (TREE_CODE (type
) != VECTOR_TYPE
)
731 || code
== FLOAT_EXPR
732 || code
== FIX_TRUNC_EXPR
733 || code
== VIEW_CONVERT_EXPR
)
736 gcc_assert (code
!= CONVERT_EXPR
);
738 /* The signedness is determined from input argument. */
739 if (code
== VEC_UNPACK_FLOAT_HI_EXPR
740 || code
== VEC_UNPACK_FLOAT_LO_EXPR
)
741 type
= TREE_TYPE (rhs1
);
743 /* Choose between vector shift/rotate by vector and vector shift/rotate by
745 if (code
== LSHIFT_EXPR
746 || code
== RSHIFT_EXPR
747 || code
== LROTATE_EXPR
748 || code
== RROTATE_EXPR
)
750 bool vector_scalar_shift
;
751 op
= optab_for_tree_code (code
, type
, optab_scalar
);
753 /* Vector/Scalar shift is supported. */
754 vector_scalar_shift
= (op
&& (optab_handler (op
, TYPE_MODE (type
))
755 != CODE_FOR_nothing
));
757 /* If the 2nd argument is vector, we need a vector/vector shift.
758 Except all the elements in the second vector are the same. */
759 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2
))))
764 /* Check whether we have vector <op> {x,x,x,x} where x
765 could be a scalar variable or a constant. Transform
766 vector <op> {x,x,x,x} ==> vector <op> scalar. */
767 if (vector_scalar_shift
768 && ((TREE_CODE (rhs2
) == VECTOR_CST
769 && (first
= uniform_vector_p (rhs2
)) != NULL_TREE
)
770 || (TREE_CODE (rhs2
) == SSA_NAME
771 && (def_stmt
= SSA_NAME_DEF_STMT (rhs2
))
772 && gimple_assign_single_p (def_stmt
)
773 && (first
= uniform_vector_p
774 (gimple_assign_rhs1 (def_stmt
))) != NULL_TREE
)))
776 gimple_assign_set_rhs2 (stmt
, first
);
781 op
= optab_for_tree_code (code
, type
, optab_vector
);
784 /* Try for a vector/scalar shift, and if we don't have one, see if we
785 have a vector/vector shift */
786 else if (!vector_scalar_shift
)
788 op
= optab_for_tree_code (code
, type
, optab_vector
);
790 if (op
&& (optab_handler (op
, TYPE_MODE (type
))
791 != CODE_FOR_nothing
))
793 /* Transform vector <op> scalar => vector <op> {x,x,x,x}. */
794 int n_parts
= TYPE_VECTOR_SUBPARTS (type
);
795 int part_size
= tree_low_cst (TYPE_SIZE (TREE_TYPE (type
)), 1);
796 tree part_type
= lang_hooks
.types
.type_for_size (part_size
, 1);
797 tree vect_type
= build_vector_type (part_type
, n_parts
);
799 rhs2
= fold_convert (part_type
, rhs2
);
800 rhs2
= build_vector_from_val (vect_type
, rhs2
);
801 gimple_assign_set_rhs2 (stmt
, rhs2
);
807 op
= optab_for_tree_code (code
, type
, optab_default
);
809 /* For widening/narrowing vector operations, the relevant type is of the
810 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
811 calculated in the same way above. */
812 if (code
== WIDEN_SUM_EXPR
813 || code
== VEC_WIDEN_MULT_HI_EXPR
814 || code
== VEC_WIDEN_MULT_LO_EXPR
815 || code
== VEC_UNPACK_HI_EXPR
816 || code
== VEC_UNPACK_LO_EXPR
817 || code
== VEC_PACK_TRUNC_EXPR
818 || code
== VEC_PACK_SAT_EXPR
819 || code
== VEC_PACK_FIX_TRUNC_EXPR
)
820 type
= TREE_TYPE (rhs1
);
822 /* Optabs will try converting a negation into a subtraction, so
823 look for it as well. TODO: negation of floating-point vectors
824 might be turned into an exclusive OR toggling the sign bit. */
826 && code
== NEGATE_EXPR
827 && INTEGRAL_TYPE_P (TREE_TYPE (type
)))
828 op
= optab_for_tree_code (MINUS_EXPR
, type
, optab_default
);
830 /* For very wide vectors, try using a smaller vector mode. */
832 if (TYPE_MODE (type
) == BLKmode
&& op
)
834 tree vector_compute_type
835 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type
)), op
,
836 TYPE_SATURATING (TREE_TYPE (type
)));
837 if (vector_compute_type
!= NULL_TREE
838 && (TYPE_VECTOR_SUBPARTS (vector_compute_type
)
839 < TYPE_VECTOR_SUBPARTS (compute_type
)))
840 compute_type
= vector_compute_type
;
843 /* If we are breaking a BLKmode vector into smaller pieces,
844 type_for_widest_vector_mode has already looked into the optab,
845 so skip these checks. */
846 if (compute_type
== type
)
848 compute_mode
= TYPE_MODE (compute_type
);
849 if ((GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_INT
850 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FLOAT
851 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_FRACT
852 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_UFRACT
853 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_ACCUM
854 || GET_MODE_CLASS (compute_mode
) == MODE_VECTOR_UACCUM
)
856 && optab_handler (op
, compute_mode
) != CODE_FOR_nothing
)
859 /* There is no operation in hardware, so fall back to scalars. */
860 compute_type
= TREE_TYPE (type
);
863 gcc_assert (code
!= VEC_LSHIFT_EXPR
&& code
!= VEC_RSHIFT_EXPR
);
864 new_rhs
= expand_vector_operation (gsi
, type
, compute_type
, stmt
, code
);
866 /* Leave expression untouched for later expansion. */
867 if (new_rhs
== NULL_TREE
)
870 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (new_rhs
)))
871 new_rhs
= gimplify_build1 (gsi
, VIEW_CONVERT_EXPR
, TREE_TYPE (lhs
),
874 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
875 way to do it is change expand_vector_operation and its callees to
876 return a tree_code, RHS1 and RHS2 instead of a tree. */
877 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
878 update_stmt (gsi_stmt (*gsi
));
881 /* Use this to lower vector operations introduced by the vectorizer,
882 if it may need the bit-twiddling tricks implemented in this file. */
885 gate_expand_vector_operations_ssa (void)
887 return optimize
== 0;
891 expand_vector_operations (void)
893 gimple_stmt_iterator gsi
;
895 bool cfg_changed
= false;
899 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
901 expand_vector_operations_1 (&gsi
);
902 /* ??? If we do not cleanup EH then we will ICE in
903 verification. But in reality we have created wrong-code
904 as we did not properly transition EH info and edges to
905 the piecewise computations. */
906 if (maybe_clean_eh_stmt (gsi_stmt (gsi
))
907 && gimple_purge_dead_eh_edges (bb
))
912 return cfg_changed
? TODO_cleanup_cfg
: 0;
915 struct gimple_opt_pass pass_lower_vector
=
919 "veclower", /* name */
920 gate_expand_vector_operations_ssa
, /* gate */
921 expand_vector_operations
, /* execute */
924 0, /* static_pass_number */
926 PROP_cfg
, /* properties_required */
927 0, /* properties_provided */
928 0, /* properties_destroyed */
929 0, /* todo_flags_start */
930 TODO_update_ssa
/* todo_flags_finish */
932 | TODO_verify_stmts
| TODO_verify_flow
937 struct gimple_opt_pass pass_lower_vector_ssa
=
941 "veclower2", /* name */
943 expand_vector_operations
, /* execute */
946 0, /* static_pass_number */
948 PROP_cfg
, /* properties_required */
949 0, /* properties_provided */
950 0, /* properties_destroyed */
951 0, /* todo_flags_start */
952 TODO_update_ssa
/* todo_flags_finish */
954 | TODO_verify_stmts
| TODO_verify_flow
959 #include "gt-tree-vect-generic.h"