LANGUAGES: Fix typos.
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
36 #include "expr.h"
37 #include "optabs.h"
38
39
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
41
42
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47 {
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
51 tree ret;
52
53 gcc_assert (n);
54
55 if (width == HOST_BITS_PER_WIDE_INT)
56 low = value;
57 else
58 {
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61 }
62
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66 high = 0;
67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68 high = low;
69 else
70 gcc_unreachable ();
71
72 ret = build_int_cst_wide (type, low, high);
73 return ret;
74 }
75
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
79
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
82 static tree
83 build_word_mode_vector_type (int nunits)
84 {
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
88 {
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
91 }
92
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
98 nunits));
99 return vector_last_type;
100 }
101
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
104
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
108 {
109 if (bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111 else
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113 }
114
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 enum tree_code code)
119 {
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
122 }
123
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
127 {
128 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132 return gimplify_build2 (gsi, code, inner_type, a, b);
133 }
134
135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
136
137 INNER_TYPE is the type of A and B elements
138
139 returned expression is of signed integer type with the
140 size equal to the size of INNER_TYPE. */
141 static tree
142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 tree bitpos, tree bitsize, enum tree_code code)
144 {
145 tree comp_type;
146
147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
149
150 comp_type = build_nonstandard_integer_type
151 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
152
153 return gimplify_build3 (gsi, COND_EXPR, comp_type,
154 fold_build2 (code, boolean_type_node, a, b),
155 build_int_cst (comp_type, -1),
156 build_int_cst (comp_type, 0));
157 }
158
159 /* Expand vector addition to scalars. This does bit twiddling
160 in order to increase parallelism:
161
162 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163 (a ^ b) & 0x80808080
164
165 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166 (a ^ ~b) & 0x80808080
167
168 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
169
170 This optimization should be done only if 4 vector items or more
171 fit into a word. */
172 static tree
173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 enum tree_code code)
176 {
177 tree inner_type = TREE_TYPE (TREE_TYPE (a));
178 unsigned HOST_WIDE_INT max;
179 tree low_bits, high_bits, a_low, b_low, result_low, signs;
180
181 max = GET_MODE_MASK (TYPE_MODE (inner_type));
182 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
184
185 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
187
188 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190 if (code == PLUS_EXPR)
191 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192 else
193 {
194 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
196 }
197
198 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
201 }
202
203 static tree
204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206 tree bitsize ATTRIBUTE_UNUSED,
207 enum tree_code code ATTRIBUTE_UNUSED)
208 {
209 tree inner_type = TREE_TYPE (TREE_TYPE (b));
210 HOST_WIDE_INT max;
211 tree low_bits, high_bits, b_low, result_low, signs;
212
213 max = GET_MODE_MASK (TYPE_MODE (inner_type));
214 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
216
217 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
218
219 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
224 }
225
226 /* Expand a vector operation to scalars, by using many operations
227 whose type is the vector type's inner type. */
228 static tree
229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230 tree type, tree inner_type,
231 tree a, tree b, enum tree_code code)
232 {
233 VEC(constructor_elt,gc) *v;
234 tree part_width = TYPE_SIZE (inner_type);
235 tree index = bitsize_int (0);
236 int nunits = TYPE_VECTOR_SUBPARTS (type);
237 int delta = tree_low_cst (part_width, 1)
238 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239 int i;
240 location_t loc = gimple_location (gsi_stmt (*gsi));
241
242 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243 warning_at (loc, OPT_Wvector_operation_performance,
244 "vector operation will be expanded piecewise");
245 else
246 warning_at (loc, OPT_Wvector_operation_performance,
247 "vector operation will be expanded in parallel");
248
249 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
250 for (i = 0; i < nunits;
251 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
252 {
253 tree result = f (gsi, inner_type, a, b, index, part_width, code);
254 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
255 ce->index = NULL_TREE;
256 ce->value = result;
257 }
258
259 return build_constructor (type, v);
260 }
261
262 /* Expand a vector operation to scalars with the freedom to use
263 a scalar integer type, or to use a different size for the items
264 in the vector type. */
265 static tree
266 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
267 tree a, tree b,
268 enum tree_code code)
269 {
270 tree result, compute_type;
271 enum machine_mode mode;
272 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
273 location_t loc = gimple_location (gsi_stmt (*gsi));
274
275 /* We have three strategies. If the type is already correct, just do
276 the operation an element at a time. Else, if the vector is wider than
277 one word, do it a word at a time; finally, if the vector is smaller
278 than one word, do it as a scalar. */
279 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
280 return expand_vector_piecewise (gsi, f,
281 type, TREE_TYPE (type),
282 a, b, code);
283 else if (n_words > 1)
284 {
285 tree word_type = build_word_mode_vector_type (n_words);
286 result = expand_vector_piecewise (gsi, f,
287 word_type, TREE_TYPE (word_type),
288 a, b, code);
289 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
290 GSI_SAME_STMT);
291 }
292 else
293 {
294 /* Use a single scalar operation with a mode no wider than word_mode. */
295 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
296 compute_type = lang_hooks.types.type_for_mode (mode, 1);
297 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
298 warning_at (loc, OPT_Wvector_operation_performance,
299 "vector operation will be expanded with a "
300 "single scalar operation");
301 }
302
303 return result;
304 }
305
306 /* Expand a vector operation to scalars; for integer types we can use
307 special bit twiddling tricks to do the sums a word at a time, using
308 function F_PARALLEL instead of F. These tricks are done only if
309 they can process at least four items, that is, only if the vector
310 holds at least four items and if a word can hold four items. */
311 static tree
312 expand_vector_addition (gimple_stmt_iterator *gsi,
313 elem_op_func f, elem_op_func f_parallel,
314 tree type, tree a, tree b, enum tree_code code)
315 {
316 int parts_per_word = UNITS_PER_WORD
317 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
318
319 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
320 && parts_per_word >= 4
321 && TYPE_VECTOR_SUBPARTS (type) >= 4)
322 return expand_vector_parallel (gsi, f_parallel,
323 type, a, b, code);
324 else
325 return expand_vector_piecewise (gsi, f,
326 type, TREE_TYPE (type),
327 a, b, code);
328 }
329
330 /* Check if vector VEC consists of all the equal elements and
331 that the number of elements corresponds to the type of VEC.
332 The function returns first element of the vector
333 or NULL_TREE if the vector is not uniform. */
334 static tree
335 uniform_vector_p (tree vec)
336 {
337 tree first, t;
338 unsigned i;
339
340 if (vec == NULL_TREE)
341 return NULL_TREE;
342
343 if (TREE_CODE (vec) == VECTOR_CST)
344 {
345 first = VECTOR_CST_ELT (vec, 0);
346 for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
347 if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
348 return NULL_TREE;
349
350 return first;
351 }
352
353 else if (TREE_CODE (vec) == CONSTRUCTOR)
354 {
355 first = error_mark_node;
356
357 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
358 {
359 if (i == 0)
360 {
361 first = t;
362 continue;
363 }
364 if (!operand_equal_p (first, t, 0))
365 return NULL_TREE;
366 }
367 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
368 return NULL_TREE;
369
370 return first;
371 }
372
373 return NULL_TREE;
374 }
375
376 /* Try to expand vector comparison expression OP0 CODE OP1 by
377 querying optab if the following expression:
378 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
379 can be expanded. */
380 static tree
381 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
382 tree op1, enum tree_code code)
383 {
384 tree t;
385 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
386 t = expand_vector_piecewise (gsi, do_compare, type,
387 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
388 else
389 t = NULL_TREE;
390
391 return t;
392 }
393
394 static tree
395 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
396 gimple assign, enum tree_code code)
397 {
398 enum machine_mode compute_mode = TYPE_MODE (compute_type);
399
400 /* If the compute mode is not a vector mode (hence we are not decomposing
401 a BLKmode vector to smaller, hardware-supported vectors), we may want
402 to expand the operations in parallel. */
403 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
404 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
405 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
406 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
407 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
408 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
409 switch (code)
410 {
411 case PLUS_EXPR:
412 case MINUS_EXPR:
413 if (!TYPE_OVERFLOW_TRAPS (type))
414 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
415 gimple_assign_rhs1 (assign),
416 gimple_assign_rhs2 (assign), code);
417 break;
418
419 case NEGATE_EXPR:
420 if (!TYPE_OVERFLOW_TRAPS (type))
421 return expand_vector_addition (gsi, do_unop, do_negate, type,
422 gimple_assign_rhs1 (assign),
423 NULL_TREE, code);
424 break;
425
426 case BIT_AND_EXPR:
427 case BIT_IOR_EXPR:
428 case BIT_XOR_EXPR:
429 return expand_vector_parallel (gsi, do_binop, type,
430 gimple_assign_rhs1 (assign),
431 gimple_assign_rhs2 (assign), code);
432
433 case BIT_NOT_EXPR:
434 return expand_vector_parallel (gsi, do_unop, type,
435 gimple_assign_rhs1 (assign),
436 NULL_TREE, code);
437 case EQ_EXPR:
438 case NE_EXPR:
439 case GT_EXPR:
440 case LT_EXPR:
441 case GE_EXPR:
442 case LE_EXPR:
443 case UNEQ_EXPR:
444 case UNGT_EXPR:
445 case UNLT_EXPR:
446 case UNGE_EXPR:
447 case UNLE_EXPR:
448 case LTGT_EXPR:
449 case ORDERED_EXPR:
450 case UNORDERED_EXPR:
451 {
452 tree rhs1 = gimple_assign_rhs1 (assign);
453 tree rhs2 = gimple_assign_rhs2 (assign);
454
455 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
456 }
457 default:
458 break;
459 }
460
461 if (TREE_CODE_CLASS (code) == tcc_unary)
462 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
463 gimple_assign_rhs1 (assign),
464 NULL_TREE, code);
465 else
466 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
467 gimple_assign_rhs1 (assign),
468 gimple_assign_rhs2 (assign), code);
469 }
470 \f
471 /* Return a type for the widest vector mode whose components are of type
472 TYPE, or NULL_TREE if none is found. */
473
474 static tree
475 type_for_widest_vector_mode (tree type, optab op)
476 {
477 enum machine_mode inner_mode = TYPE_MODE (type);
478 enum machine_mode best_mode = VOIDmode, mode;
479 int best_nunits = 0;
480
481 if (SCALAR_FLOAT_MODE_P (inner_mode))
482 mode = MIN_MODE_VECTOR_FLOAT;
483 else if (SCALAR_FRACT_MODE_P (inner_mode))
484 mode = MIN_MODE_VECTOR_FRACT;
485 else if (SCALAR_UFRACT_MODE_P (inner_mode))
486 mode = MIN_MODE_VECTOR_UFRACT;
487 else if (SCALAR_ACCUM_MODE_P (inner_mode))
488 mode = MIN_MODE_VECTOR_ACCUM;
489 else if (SCALAR_UACCUM_MODE_P (inner_mode))
490 mode = MIN_MODE_VECTOR_UACCUM;
491 else
492 mode = MIN_MODE_VECTOR_INT;
493
494 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
495 if (GET_MODE_INNER (mode) == inner_mode
496 && GET_MODE_NUNITS (mode) > best_nunits
497 && optab_handler (op, mode) != CODE_FOR_nothing)
498 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
499
500 if (best_mode == VOIDmode)
501 return NULL_TREE;
502 else
503 return build_vector_type_for_mode (type, best_mode);
504 }
505
506
507 /* Build a reference to the element of the vector VECT. Function
508 returns either the element itself, either BIT_FIELD_REF, or an
509 ARRAY_REF expression.
510
511 GSI is required to insert temporary variables while building a
512 refernece to the element of the vector VECT.
513
514 PTMPVEC is a pointer to the temporary variable for caching
515 purposes. In case when PTMPVEC is NULL new temporary variable
516 will be created. */
517 static tree
518 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
519 {
520 tree vect_type, vect_elt_type;
521 gimple asgn;
522 tree tmpvec;
523 tree arraytype;
524 bool need_asgn = true;
525 unsigned int elements;
526
527 vect_type = TREE_TYPE (vect);
528 vect_elt_type = TREE_TYPE (vect_type);
529 elements = TYPE_VECTOR_SUBPARTS (vect_type);
530
531 if (TREE_CODE (idx) == INTEGER_CST)
532 {
533 unsigned HOST_WIDE_INT index;
534
535 /* Given that we're about to compute a binary modulus,
536 we don't care about the high bits of the value. */
537 index = TREE_INT_CST_LOW (idx);
538 if (!host_integerp (idx, 1) || index >= elements)
539 {
540 index &= elements - 1;
541 idx = build_int_cst (TREE_TYPE (idx), index);
542 }
543
544 /* When lowering a vector statement sequence do some easy
545 simplification by looking through intermediate vector results. */
546 if (TREE_CODE (vect) == SSA_NAME)
547 {
548 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
549 if (is_gimple_assign (def_stmt)
550 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
551 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
552 vect = gimple_assign_rhs1 (def_stmt);
553 }
554
555 if (TREE_CODE (vect) == VECTOR_CST)
556 return VECTOR_CST_ELT (vect, index);
557 else if (TREE_CODE (vect) == CONSTRUCTOR)
558 {
559 unsigned i;
560 tree elt_i, elt_v;
561
562 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
563 if (operand_equal_p (elt_i, idx, 0))
564 return elt_v;
565 return build_zero_cst (vect_elt_type);
566 }
567 else
568 {
569 tree size = TYPE_SIZE (vect_elt_type);
570 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
571 size);
572 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
573 }
574 }
575
576 if (!ptmpvec)
577 tmpvec = create_tmp_var (vect_type, "vectmp");
578 else if (!*ptmpvec)
579 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
580 else
581 {
582 tmpvec = *ptmpvec;
583 need_asgn = false;
584 }
585
586 if (need_asgn)
587 {
588 TREE_ADDRESSABLE (tmpvec) = 1;
589 asgn = gimple_build_assign (tmpvec, vect);
590 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
591 }
592
593 arraytype = build_array_type_nelts (vect_elt_type, elements);
594 return build4 (ARRAY_REF, vect_elt_type,
595 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
596 idx, NULL_TREE, NULL_TREE);
597 }
598
599 /* Check if VEC_PERM_EXPR within the given setting is supported
600 by hardware, or lower it piecewise.
601
602 When VEC_PERM_EXPR has the same first and second operands:
603 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
604 {v0[mask[0]], v0[mask[1]], ...}
605 MASK and V0 must have the same number of elements.
606
607 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
608 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
609 V0 and V1 must have the same type. MASK, V0, V1 must have the
610 same number of arguments. */
611
612 static void
613 lower_vec_perm (gimple_stmt_iterator *gsi)
614 {
615 gimple stmt = gsi_stmt (*gsi);
616 tree mask = gimple_assign_rhs3 (stmt);
617 tree vec0 = gimple_assign_rhs1 (stmt);
618 tree vec1 = gimple_assign_rhs2 (stmt);
619 tree vect_type = TREE_TYPE (vec0);
620 tree mask_type = TREE_TYPE (mask);
621 tree vect_elt_type = TREE_TYPE (vect_type);
622 tree mask_elt_type = TREE_TYPE (mask_type);
623 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
624 VEC(constructor_elt,gc) *v;
625 tree constr, t, si, i_val;
626 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
627 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
628 location_t loc = gimple_location (gsi_stmt (*gsi));
629 unsigned i;
630
631 if (TREE_CODE (mask) == VECTOR_CST)
632 {
633 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
634
635 for (i = 0; i < elements; ++i)
636 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
637 & (2 * elements - 1));
638
639 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
640 return;
641 }
642 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
643 return;
644
645 warning_at (loc, OPT_Wvector_operation_performance,
646 "vector shuffling operation will be expanded piecewise");
647
648 v = VEC_alloc (constructor_elt, gc, elements);
649 for (i = 0; i < elements; i++)
650 {
651 si = size_int (i);
652 i_val = vector_element (gsi, mask, si, &masktmp);
653
654 if (TREE_CODE (i_val) == INTEGER_CST)
655 {
656 unsigned HOST_WIDE_INT index;
657
658 index = TREE_INT_CST_LOW (i_val);
659 if (!host_integerp (i_val, 1) || index >= elements)
660 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
661
662 if (two_operand_p && (index & elements) != 0)
663 t = vector_element (gsi, vec1, i_val, &vec1tmp);
664 else
665 t = vector_element (gsi, vec0, i_val, &vec0tmp);
666
667 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
668 true, GSI_SAME_STMT);
669 }
670 else
671 {
672 tree cond = NULL_TREE, v0_val;
673
674 if (two_operand_p)
675 {
676 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
677 build_int_cst (mask_elt_type, elements));
678 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
679 true, GSI_SAME_STMT);
680 }
681
682 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
683 build_int_cst (mask_elt_type, elements - 1));
684 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
685 true, GSI_SAME_STMT);
686
687 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
688 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
689 true, GSI_SAME_STMT);
690
691 if (two_operand_p)
692 {
693 tree v1_val;
694
695 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
696 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
697 true, GSI_SAME_STMT);
698
699 cond = fold_build2 (EQ_EXPR, boolean_type_node,
700 cond, build_zero_cst (mask_elt_type));
701 cond = fold_build3 (COND_EXPR, vect_elt_type,
702 cond, v0_val, v1_val);
703 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
704 true, GSI_SAME_STMT);
705 }
706 else
707 t = v0_val;
708 }
709
710 CONSTRUCTOR_APPEND_ELT (v, si, t);
711 }
712
713 constr = build_constructor (vect_type, v);
714 gimple_assign_set_rhs_from_tree (gsi, constr);
715 update_stmt (gsi_stmt (*gsi));
716 }
717
718 /* Process one statement. If we identify a vector operation, expand it. */
719
720 static void
721 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
722 {
723 gimple stmt = gsi_stmt (*gsi);
724 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
725 enum tree_code code;
726 enum machine_mode compute_mode;
727 optab op = NULL;
728 enum gimple_rhs_class rhs_class;
729 tree new_rhs;
730
731 if (gimple_code (stmt) != GIMPLE_ASSIGN)
732 return;
733
734 code = gimple_assign_rhs_code (stmt);
735 rhs_class = get_gimple_rhs_class (code);
736 lhs = gimple_assign_lhs (stmt);
737
738 if (code == VEC_PERM_EXPR)
739 {
740 lower_vec_perm (gsi);
741 return;
742 }
743
744 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
745 return;
746
747 rhs1 = gimple_assign_rhs1 (stmt);
748 type = gimple_expr_type (stmt);
749 if (rhs_class == GIMPLE_BINARY_RHS)
750 rhs2 = gimple_assign_rhs2 (stmt);
751
752 if (TREE_CODE (type) != VECTOR_TYPE)
753 return;
754
755 if (code == NOP_EXPR
756 || code == FLOAT_EXPR
757 || code == FIX_TRUNC_EXPR
758 || code == VIEW_CONVERT_EXPR)
759 return;
760
761 gcc_assert (code != CONVERT_EXPR);
762
763 /* The signedness is determined from input argument. */
764 if (code == VEC_UNPACK_FLOAT_HI_EXPR
765 || code == VEC_UNPACK_FLOAT_LO_EXPR)
766 type = TREE_TYPE (rhs1);
767
768 /* Choose between vector shift/rotate by vector and vector shift/rotate by
769 scalar */
770 if (code == LSHIFT_EXPR
771 || code == RSHIFT_EXPR
772 || code == LROTATE_EXPR
773 || code == RROTATE_EXPR)
774 {
775 optab opv;
776
777 /* Check whether we have vector <op> {x,x,x,x} where x
778 could be a scalar variable or a constant. Transform
779 vector <op> {x,x,x,x} ==> vector <op> scalar. */
780 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
781 {
782 tree first;
783 gimple def_stmt;
784
785 if ((TREE_CODE (rhs2) == VECTOR_CST
786 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
787 || (TREE_CODE (rhs2) == SSA_NAME
788 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
789 && gimple_assign_single_p (def_stmt)
790 && (first = uniform_vector_p
791 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
792 {
793 gimple_assign_set_rhs2 (stmt, first);
794 update_stmt (stmt);
795 rhs2 = first;
796 }
797 }
798
799 opv = optab_for_tree_code (code, type, optab_vector);
800 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
801 op = opv;
802 else
803 {
804 op = optab_for_tree_code (code, type, optab_scalar);
805
806 /* The rtl expander will expand vector/scalar as vector/vector
807 if necessary. Don't bother converting the stmt here. */
808 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
809 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
810 return;
811 }
812 }
813 else
814 op = optab_for_tree_code (code, type, optab_default);
815
816 /* For widening/narrowing vector operations, the relevant type is of the
817 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
818 calculated in the same way above. */
819 if (code == WIDEN_SUM_EXPR
820 || code == VEC_WIDEN_MULT_HI_EXPR
821 || code == VEC_WIDEN_MULT_LO_EXPR
822 || code == VEC_UNPACK_HI_EXPR
823 || code == VEC_UNPACK_LO_EXPR
824 || code == VEC_PACK_TRUNC_EXPR
825 || code == VEC_PACK_SAT_EXPR
826 || code == VEC_PACK_FIX_TRUNC_EXPR
827 || code == VEC_WIDEN_LSHIFT_HI_EXPR
828 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
829 type = TREE_TYPE (rhs1);
830
831 /* Optabs will try converting a negation into a subtraction, so
832 look for it as well. TODO: negation of floating-point vectors
833 might be turned into an exclusive OR toggling the sign bit. */
834 if (op == NULL
835 && code == NEGATE_EXPR
836 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
837 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
838
839 /* For very wide vectors, try using a smaller vector mode. */
840 compute_type = type;
841 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
842 {
843 tree vector_compute_type
844 = type_for_widest_vector_mode (TREE_TYPE (type), op);
845 if (vector_compute_type != NULL_TREE
846 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
847 < TYPE_VECTOR_SUBPARTS (compute_type))
848 && (optab_handler (op, TYPE_MODE (vector_compute_type))
849 != CODE_FOR_nothing))
850 compute_type = vector_compute_type;
851 }
852
853 /* If we are breaking a BLKmode vector into smaller pieces,
854 type_for_widest_vector_mode has already looked into the optab,
855 so skip these checks. */
856 if (compute_type == type)
857 {
858 compute_mode = TYPE_MODE (compute_type);
859 if (VECTOR_MODE_P (compute_mode)
860 && op != NULL
861 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
862 return;
863 else
864 /* There is no operation in hardware, so fall back to scalars. */
865 compute_type = TREE_TYPE (type);
866 }
867
868 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
869 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
870
871 /* Leave expression untouched for later expansion. */
872 if (new_rhs == NULL_TREE)
873 return;
874
875 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
876 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
877 new_rhs);
878
879 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
880 way to do it is change expand_vector_operation and its callees to
881 return a tree_code, RHS1 and RHS2 instead of a tree. */
882 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
883 update_stmt (gsi_stmt (*gsi));
884 }
885 \f
886 /* Use this to lower vector operations introduced by the vectorizer,
887 if it may need the bit-twiddling tricks implemented in this file. */
888
889 static bool
890 gate_expand_vector_operations_ssa (void)
891 {
892 return optimize == 0;
893 }
894
895 static unsigned int
896 expand_vector_operations (void)
897 {
898 gimple_stmt_iterator gsi;
899 basic_block bb;
900 bool cfg_changed = false;
901
902 FOR_EACH_BB (bb)
903 {
904 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
905 {
906 expand_vector_operations_1 (&gsi);
907 /* ??? If we do not cleanup EH then we will ICE in
908 verification. But in reality we have created wrong-code
909 as we did not properly transition EH info and edges to
910 the piecewise computations. */
911 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
912 && gimple_purge_dead_eh_edges (bb))
913 cfg_changed = true;
914 }
915 }
916
917 return cfg_changed ? TODO_cleanup_cfg : 0;
918 }
919
920 struct gimple_opt_pass pass_lower_vector =
921 {
922 {
923 GIMPLE_PASS,
924 "veclower", /* name */
925 gate_expand_vector_operations_ssa, /* gate */
926 expand_vector_operations, /* execute */
927 NULL, /* sub */
928 NULL, /* next */
929 0, /* static_pass_number */
930 TV_NONE, /* tv_id */
931 PROP_cfg, /* properties_required */
932 0, /* properties_provided */
933 0, /* properties_destroyed */
934 0, /* todo_flags_start */
935 TODO_update_ssa /* todo_flags_finish */
936 | TODO_verify_ssa
937 | TODO_verify_stmts | TODO_verify_flow
938 | TODO_cleanup_cfg
939 }
940 };
941
942 struct gimple_opt_pass pass_lower_vector_ssa =
943 {
944 {
945 GIMPLE_PASS,
946 "veclower2", /* name */
947 0, /* gate */
948 expand_vector_operations, /* execute */
949 NULL, /* sub */
950 NULL, /* next */
951 0, /* static_pass_number */
952 TV_NONE, /* tv_id */
953 PROP_cfg, /* properties_required */
954 0, /* properties_provided */
955 0, /* properties_destroyed */
956 0, /* todo_flags_start */
957 TODO_update_ssa /* todo_flags_finish */
958 | TODO_verify_ssa
959 | TODO_verify_stmts | TODO_verify_flow
960 | TODO_cleanup_cfg
961 }
962 };
963
964 #include "gt-tree-vect-generic.h"