Implement interleave via permutation.
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
36 #include "expr.h"
37 #include "optabs.h"
38
39
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
41
42
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47 {
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
51 tree ret;
52
53 gcc_assert (n);
54
55 if (width == HOST_BITS_PER_WIDE_INT)
56 low = value;
57 else
58 {
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61 }
62
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66 high = 0;
67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68 high = low;
69 else
70 gcc_unreachable ();
71
72 ret = build_int_cst_wide (type, low, high);
73 return ret;
74 }
75
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
79
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
82 static tree
83 build_word_mode_vector_type (int nunits)
84 {
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
88 {
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
91 }
92
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
98 nunits));
99 return vector_last_type;
100 }
101
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
104
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
108 {
109 if (bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111 else
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113 }
114
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 enum tree_code code)
119 {
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
122 }
123
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
127 {
128 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
129 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
130 return gimplify_build2 (gsi, code, inner_type, a, b);
131 }
132
133 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
134
135 INNER_TYPE is the type of A and B elements
136
137 returned expression is of signed integer type with the
138 size equal to the size of INNER_TYPE. */
139 static tree
140 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
141 tree bitpos, tree bitsize, enum tree_code code)
142 {
143 tree comp_type;
144
145 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
146 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
147
148 comp_type = build_nonstandard_integer_type
149 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
150
151 return gimplify_build3 (gsi, COND_EXPR, comp_type,
152 fold_build2 (code, boolean_type_node, a, b),
153 build_int_cst (comp_type, -1),
154 build_int_cst (comp_type, 0));
155 }
156
157 /* Expand vector addition to scalars. This does bit twiddling
158 in order to increase parallelism:
159
160 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
161 (a ^ b) & 0x80808080
162
163 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
164 (a ^ ~b) & 0x80808080
165
166 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
167
168 This optimization should be done only if 4 vector items or more
169 fit into a word. */
170 static tree
171 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
172 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
173 enum tree_code code)
174 {
175 tree inner_type = TREE_TYPE (TREE_TYPE (a));
176 unsigned HOST_WIDE_INT max;
177 tree low_bits, high_bits, a_low, b_low, result_low, signs;
178
179 max = GET_MODE_MASK (TYPE_MODE (inner_type));
180 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
181 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
182
183 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
184 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
185
186 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
187 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
188 if (code == PLUS_EXPR)
189 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
190 else
191 {
192 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
193 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
194 }
195
196 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
197 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
198 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
199 }
200
201 static tree
202 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
203 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
204 tree bitsize ATTRIBUTE_UNUSED,
205 enum tree_code code ATTRIBUTE_UNUSED)
206 {
207 tree inner_type = TREE_TYPE (TREE_TYPE (b));
208 HOST_WIDE_INT max;
209 tree low_bits, high_bits, b_low, result_low, signs;
210
211 max = GET_MODE_MASK (TYPE_MODE (inner_type));
212 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
213 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
214
215 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
216
217 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
218 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
219 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
220 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
221 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
222 }
223
224 /* Expand a vector operation to scalars, by using many operations
225 whose type is the vector type's inner type. */
226 static tree
227 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
228 tree type, tree inner_type,
229 tree a, tree b, enum tree_code code)
230 {
231 VEC(constructor_elt,gc) *v;
232 tree part_width = TYPE_SIZE (inner_type);
233 tree index = bitsize_int (0);
234 int nunits = TYPE_VECTOR_SUBPARTS (type);
235 int delta = tree_low_cst (part_width, 1)
236 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
237 int i;
238 location_t loc = gimple_location (gsi_stmt (*gsi));
239
240 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
241 warning_at (loc, OPT_Wvector_operation_performance,
242 "vector operation will be expanded piecewise");
243 else
244 warning_at (loc, OPT_Wvector_operation_performance,
245 "vector operation will be expanded in parallel");
246
247 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
248 for (i = 0; i < nunits;
249 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
250 {
251 tree result = f (gsi, inner_type, a, b, index, part_width, code);
252 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
253 ce->index = NULL_TREE;
254 ce->value = result;
255 }
256
257 return build_constructor (type, v);
258 }
259
260 /* Expand a vector operation to scalars with the freedom to use
261 a scalar integer type, or to use a different size for the items
262 in the vector type. */
263 static tree
264 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
265 tree a, tree b,
266 enum tree_code code)
267 {
268 tree result, compute_type;
269 enum machine_mode mode;
270 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
271 location_t loc = gimple_location (gsi_stmt (*gsi));
272
273 /* We have three strategies. If the type is already correct, just do
274 the operation an element at a time. Else, if the vector is wider than
275 one word, do it a word at a time; finally, if the vector is smaller
276 than one word, do it as a scalar. */
277 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
278 return expand_vector_piecewise (gsi, f,
279 type, TREE_TYPE (type),
280 a, b, code);
281 else if (n_words > 1)
282 {
283 tree word_type = build_word_mode_vector_type (n_words);
284 result = expand_vector_piecewise (gsi, f,
285 word_type, TREE_TYPE (word_type),
286 a, b, code);
287 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
288 GSI_SAME_STMT);
289 }
290 else
291 {
292 /* Use a single scalar operation with a mode no wider than word_mode. */
293 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
294 compute_type = lang_hooks.types.type_for_mode (mode, 1);
295 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
296 warning_at (loc, OPT_Wvector_operation_performance,
297 "vector operation will be expanded with a "
298 "single scalar operation");
299 }
300
301 return result;
302 }
303
304 /* Expand a vector operation to scalars; for integer types we can use
305 special bit twiddling tricks to do the sums a word at a time, using
306 function F_PARALLEL instead of F. These tricks are done only if
307 they can process at least four items, that is, only if the vector
308 holds at least four items and if a word can hold four items. */
309 static tree
310 expand_vector_addition (gimple_stmt_iterator *gsi,
311 elem_op_func f, elem_op_func f_parallel,
312 tree type, tree a, tree b, enum tree_code code)
313 {
314 int parts_per_word = UNITS_PER_WORD
315 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
316
317 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
318 && parts_per_word >= 4
319 && TYPE_VECTOR_SUBPARTS (type) >= 4)
320 return expand_vector_parallel (gsi, f_parallel,
321 type, a, b, code);
322 else
323 return expand_vector_piecewise (gsi, f,
324 type, TREE_TYPE (type),
325 a, b, code);
326 }
327
328 /* Check if vector VEC consists of all the equal elements and
329 that the number of elements corresponds to the type of VEC.
330 The function returns first element of the vector
331 or NULL_TREE if the vector is not uniform. */
332 static tree
333 uniform_vector_p (tree vec)
334 {
335 tree first, t, els;
336 unsigned i;
337
338 if (vec == NULL_TREE)
339 return NULL_TREE;
340
341 if (TREE_CODE (vec) == VECTOR_CST)
342 {
343 els = TREE_VECTOR_CST_ELTS (vec);
344 first = TREE_VALUE (els);
345 els = TREE_CHAIN (els);
346
347 for (t = els; t; t = TREE_CHAIN (t))
348 if (!operand_equal_p (first, TREE_VALUE (t), 0))
349 return NULL_TREE;
350
351 return first;
352 }
353
354 else if (TREE_CODE (vec) == CONSTRUCTOR)
355 {
356 first = error_mark_node;
357
358 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
359 {
360 if (i == 0)
361 {
362 first = t;
363 continue;
364 }
365 if (!operand_equal_p (first, t, 0))
366 return NULL_TREE;
367 }
368 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
369 return NULL_TREE;
370
371 return first;
372 }
373
374 return NULL_TREE;
375 }
376
377 /* Try to expand vector comparison expression OP0 CODE OP1 by
378 querying optab if the following expression:
379 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
380 can be expanded. */
381 static tree
382 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
383 tree op1, enum tree_code code)
384 {
385 tree t;
386 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
387 t = expand_vector_piecewise (gsi, do_compare, type,
388 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
389 else
390 t = NULL_TREE;
391
392 return t;
393 }
394
395 static tree
396 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
397 gimple assign, enum tree_code code)
398 {
399 enum machine_mode compute_mode = TYPE_MODE (compute_type);
400
401 /* If the compute mode is not a vector mode (hence we are not decomposing
402 a BLKmode vector to smaller, hardware-supported vectors), we may want
403 to expand the operations in parallel. */
404 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
405 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
406 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
407 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
408 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
409 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
410 switch (code)
411 {
412 case PLUS_EXPR:
413 case MINUS_EXPR:
414 if (!TYPE_OVERFLOW_TRAPS (type))
415 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
416 gimple_assign_rhs1 (assign),
417 gimple_assign_rhs2 (assign), code);
418 break;
419
420 case NEGATE_EXPR:
421 if (!TYPE_OVERFLOW_TRAPS (type))
422 return expand_vector_addition (gsi, do_unop, do_negate, type,
423 gimple_assign_rhs1 (assign),
424 NULL_TREE, code);
425 break;
426
427 case BIT_AND_EXPR:
428 case BIT_IOR_EXPR:
429 case BIT_XOR_EXPR:
430 return expand_vector_parallel (gsi, do_binop, type,
431 gimple_assign_rhs1 (assign),
432 gimple_assign_rhs2 (assign), code);
433
434 case BIT_NOT_EXPR:
435 return expand_vector_parallel (gsi, do_unop, type,
436 gimple_assign_rhs1 (assign),
437 NULL_TREE, code);
438 case EQ_EXPR:
439 case NE_EXPR:
440 case GT_EXPR:
441 case LT_EXPR:
442 case GE_EXPR:
443 case LE_EXPR:
444 case UNEQ_EXPR:
445 case UNGT_EXPR:
446 case UNLT_EXPR:
447 case UNGE_EXPR:
448 case UNLE_EXPR:
449 case LTGT_EXPR:
450 case ORDERED_EXPR:
451 case UNORDERED_EXPR:
452 {
453 tree rhs1 = gimple_assign_rhs1 (assign);
454 tree rhs2 = gimple_assign_rhs2 (assign);
455
456 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
457 }
458 default:
459 break;
460 }
461
462 if (TREE_CODE_CLASS (code) == tcc_unary)
463 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
464 gimple_assign_rhs1 (assign),
465 NULL_TREE, code);
466 else
467 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
468 gimple_assign_rhs1 (assign),
469 gimple_assign_rhs2 (assign), code);
470 }
471 \f
472 /* Return a type for the widest vector mode whose components are of mode
473 INNER_MODE, or NULL_TREE if none is found.
474 SATP is true for saturating fixed-point types. */
475
476 static tree
477 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
478 {
479 enum machine_mode best_mode = VOIDmode, mode;
480 int best_nunits = 0;
481
482 if (SCALAR_FLOAT_MODE_P (inner_mode))
483 mode = MIN_MODE_VECTOR_FLOAT;
484 else if (SCALAR_FRACT_MODE_P (inner_mode))
485 mode = MIN_MODE_VECTOR_FRACT;
486 else if (SCALAR_UFRACT_MODE_P (inner_mode))
487 mode = MIN_MODE_VECTOR_UFRACT;
488 else if (SCALAR_ACCUM_MODE_P (inner_mode))
489 mode = MIN_MODE_VECTOR_ACCUM;
490 else if (SCALAR_UACCUM_MODE_P (inner_mode))
491 mode = MIN_MODE_VECTOR_UACCUM;
492 else
493 mode = MIN_MODE_VECTOR_INT;
494
495 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
496 if (GET_MODE_INNER (mode) == inner_mode
497 && GET_MODE_NUNITS (mode) > best_nunits
498 && optab_handler (op, mode) != CODE_FOR_nothing)
499 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
500
501 if (best_mode == VOIDmode)
502 return NULL_TREE;
503 else
504 {
505 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
506 if (ALL_FIXED_POINT_MODE_P (best_mode))
507 return lang_hooks.types.type_for_mode (best_mode, satp);
508
509 return lang_hooks.types.type_for_mode (best_mode, 1);
510 }
511 }
512
513
514 /* Build a reference to the element of the vector VECT. Function
515 returns either the element itself, either BIT_FIELD_REF, or an
516 ARRAY_REF expression.
517
518 GSI is requred to insert temporary variables while building a
519 refernece to the element of the vector VECT.
520
521 PTMPVEC is a pointer to the temporary variable for caching
522 purposes. In case when PTMPVEC is NULL new temporary variable
523 will be created. */
524 static tree
525 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
526 {
527 tree vect_type, vect_elt_type;
528 gimple asgn;
529 tree tmpvec;
530 tree arraytype;
531 bool need_asgn = true;
532 unsigned int elements;
533
534 vect_type = TREE_TYPE (vect);
535 vect_elt_type = TREE_TYPE (vect_type);
536 elements = TYPE_VECTOR_SUBPARTS (vect_type);
537
538 if (TREE_CODE (idx) == INTEGER_CST)
539 {
540 unsigned HOST_WIDE_INT index;
541
542 /* Given that we're about to compute a binary modulus,
543 we don't care about the high bits of the value. */
544 index = TREE_INT_CST_LOW (idx);
545 if (!host_integerp (idx, 1) || index >= elements)
546 {
547 index &= elements - 1;
548 idx = build_int_cst (TREE_TYPE (idx), index);
549 }
550
551 /* When lowering a vector statement sequence do some easy
552 simplification by looking through intermediate vector results. */
553 if (TREE_CODE (vect) == SSA_NAME)
554 {
555 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
556 if (is_gimple_assign (def_stmt)
557 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
558 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
559 vect = gimple_assign_rhs1 (def_stmt);
560 }
561
562 if (TREE_CODE (vect) == VECTOR_CST)
563 {
564 unsigned i;
565 tree vals = TREE_VECTOR_CST_ELTS (vect);
566 for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
567 if (i == index)
568 return TREE_VALUE (vals);
569 return build_zero_cst (vect_elt_type);
570 }
571 else if (TREE_CODE (vect) == CONSTRUCTOR)
572 {
573 unsigned i;
574 tree elt_i, elt_v;
575
576 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
577 if (operand_equal_p (elt_i, idx, 0))
578 return elt_v;
579 return build_zero_cst (vect_elt_type);
580 }
581 else
582 {
583 tree size = TYPE_SIZE (vect_elt_type);
584 tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
585 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
586 }
587 }
588
589 if (!ptmpvec)
590 tmpvec = create_tmp_var (vect_type, "vectmp");
591 else if (!*ptmpvec)
592 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
593 else
594 {
595 tmpvec = *ptmpvec;
596 need_asgn = false;
597 }
598
599 if (need_asgn)
600 {
601 TREE_ADDRESSABLE (tmpvec) = 1;
602 asgn = gimple_build_assign (tmpvec, vect);
603 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
604 }
605
606 arraytype = build_array_type_nelts (vect_elt_type, elements);
607 return build4 (ARRAY_REF, vect_elt_type,
608 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
609 idx, NULL_TREE, NULL_TREE);
610 }
611
612 /* Check if VEC_PERM_EXPR within the given setting is supported
613 by hardware, or lower it piecewise.
614
615 When VEC_PERM_EXPR has the same first and second operands:
616 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
617 {v0[mask[0]], v0[mask[1]], ...}
618 MASK and V0 must have the same number of elements.
619
620 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
621 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
622 V0 and V1 must have the same type. MASK, V0, V1 must have the
623 same number of arguments. */
624
625 static void
626 lower_vec_perm (gimple_stmt_iterator *gsi)
627 {
628 gimple stmt = gsi_stmt (*gsi);
629 tree mask = gimple_assign_rhs3 (stmt);
630 tree vec0 = gimple_assign_rhs1 (stmt);
631 tree vec1 = gimple_assign_rhs2 (stmt);
632 tree vect_type = TREE_TYPE (vec0);
633 tree mask_type = TREE_TYPE (mask);
634 tree vect_elt_type = TREE_TYPE (vect_type);
635 tree mask_elt_type = TREE_TYPE (mask_type);
636 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
637 VEC(constructor_elt,gc) *v;
638 tree constr, t, si, i_val;
639 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
640 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
641 location_t loc = gimple_location (gsi_stmt (*gsi));
642 unsigned i;
643
644 if (TREE_CODE (mask) == VECTOR_CST)
645 {
646 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
647 tree vals = TREE_VECTOR_CST_ELTS (mask);
648
649 for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
650 sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals));
651
652 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
653 return;
654 }
655 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
656 return;
657
658 warning_at (loc, OPT_Wvector_operation_performance,
659 "vector shuffling operation will be expanded piecewise");
660
661 v = VEC_alloc (constructor_elt, gc, elements);
662 for (i = 0; i < elements; i++)
663 {
664 si = size_int (i);
665 i_val = vector_element (gsi, mask, si, &masktmp);
666
667 if (TREE_CODE (i_val) == INTEGER_CST)
668 {
669 unsigned HOST_WIDE_INT index;
670
671 index = TREE_INT_CST_LOW (i_val);
672 if (!host_integerp (i_val, 1) || index >= elements)
673 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
674
675 if (two_operand_p && (index & elements) != 0)
676 t = vector_element (gsi, vec1, i_val, &vec1tmp);
677 else
678 t = vector_element (gsi, vec0, i_val, &vec0tmp);
679
680 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
681 true, GSI_SAME_STMT);
682 }
683 else
684 {
685 tree cond = NULL_TREE, v0_val;
686
687 if (two_operand_p)
688 {
689 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
690 build_int_cst (mask_elt_type, elements));
691 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
692 true, GSI_SAME_STMT);
693 }
694
695 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
696 build_int_cst (mask_elt_type, elements - 1));
697 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
698 true, GSI_SAME_STMT);
699
700 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
701 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
702 true, GSI_SAME_STMT);
703
704 if (two_operand_p)
705 {
706 tree v1_val;
707
708 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
709 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
710 true, GSI_SAME_STMT);
711
712 cond = fold_build2 (EQ_EXPR, boolean_type_node,
713 cond, build_zero_cst (mask_elt_type));
714 cond = fold_build3 (COND_EXPR, vect_elt_type,
715 cond, v0_val, v1_val);
716 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
717 true, GSI_SAME_STMT);
718 }
719 else
720 t = v0_val;
721 }
722
723 CONSTRUCTOR_APPEND_ELT (v, si, t);
724 }
725
726 constr = build_constructor (vect_type, v);
727 gimple_assign_set_rhs_from_tree (gsi, constr);
728 update_stmt (gsi_stmt (*gsi));
729 }
730
731 /* Process one statement. If we identify a vector operation, expand it. */
732
733 static void
734 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
735 {
736 gimple stmt = gsi_stmt (*gsi);
737 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
738 enum tree_code code;
739 enum machine_mode compute_mode;
740 optab op = NULL;
741 enum gimple_rhs_class rhs_class;
742 tree new_rhs;
743
744 if (gimple_code (stmt) != GIMPLE_ASSIGN)
745 return;
746
747 code = gimple_assign_rhs_code (stmt);
748 rhs_class = get_gimple_rhs_class (code);
749 lhs = gimple_assign_lhs (stmt);
750
751 if (code == VEC_PERM_EXPR)
752 {
753 lower_vec_perm (gsi);
754 return;
755 }
756
757 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
758 return;
759
760 rhs1 = gimple_assign_rhs1 (stmt);
761 type = gimple_expr_type (stmt);
762 if (rhs_class == GIMPLE_BINARY_RHS)
763 rhs2 = gimple_assign_rhs2 (stmt);
764
765 if (TREE_CODE (type) != VECTOR_TYPE)
766 return;
767
768 if (code == NOP_EXPR
769 || code == FLOAT_EXPR
770 || code == FIX_TRUNC_EXPR
771 || code == VIEW_CONVERT_EXPR)
772 return;
773
774 /* These are only created by the vectorizer, after having queried
775 the target support. It's more than just looking at the optab,
776 and there's no need to do it again. */
777 if (code == VEC_INTERLEAVE_HIGH_EXPR
778 || code == VEC_INTERLEAVE_LOW_EXPR
779 || code == VEC_EXTRACT_EVEN_EXPR
780 || code == VEC_EXTRACT_ODD_EXPR)
781 return;
782
783 gcc_assert (code != CONVERT_EXPR);
784
785 /* The signedness is determined from input argument. */
786 if (code == VEC_UNPACK_FLOAT_HI_EXPR
787 || code == VEC_UNPACK_FLOAT_LO_EXPR)
788 type = TREE_TYPE (rhs1);
789
790 /* Choose between vector shift/rotate by vector and vector shift/rotate by
791 scalar */
792 if (code == LSHIFT_EXPR
793 || code == RSHIFT_EXPR
794 || code == LROTATE_EXPR
795 || code == RROTATE_EXPR)
796 {
797 /* Check whether we have vector <op> {x,x,x,x} where x
798 could be a scalar variable or a constant. Transform
799 vector <op> {x,x,x,x} ==> vector <op> scalar. */
800 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
801 {
802 tree first;
803 gimple def_stmt;
804
805 if ((TREE_CODE (rhs2) == VECTOR_CST
806 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
807 || (TREE_CODE (rhs2) == SSA_NAME
808 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
809 && gimple_assign_single_p (def_stmt)
810 && (first = uniform_vector_p
811 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
812 {
813 gimple_assign_set_rhs2 (stmt, first);
814 update_stmt (stmt);
815 rhs2 = first;
816 }
817 }
818
819 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
820 op = optab_for_tree_code (code, type, optab_vector);
821 else
822 {
823 op = optab_for_tree_code (code, type, optab_scalar);
824
825 /* The rtl expander will expand vector/scalar as vector/vector
826 if necessary. Don't bother converting the stmt here. */
827 if (op == NULL
828 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
829 op = optab_for_tree_code (code, type, optab_vector);
830 }
831 }
832 else
833 op = optab_for_tree_code (code, type, optab_default);
834
835 /* For widening/narrowing vector operations, the relevant type is of the
836 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
837 calculated in the same way above. */
838 if (code == WIDEN_SUM_EXPR
839 || code == VEC_WIDEN_MULT_HI_EXPR
840 || code == VEC_WIDEN_MULT_LO_EXPR
841 || code == VEC_UNPACK_HI_EXPR
842 || code == VEC_UNPACK_LO_EXPR
843 || code == VEC_PACK_TRUNC_EXPR
844 || code == VEC_PACK_SAT_EXPR
845 || code == VEC_PACK_FIX_TRUNC_EXPR
846 || code == VEC_WIDEN_LSHIFT_HI_EXPR
847 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
848 type = TREE_TYPE (rhs1);
849
850 /* Optabs will try converting a negation into a subtraction, so
851 look for it as well. TODO: negation of floating-point vectors
852 might be turned into an exclusive OR toggling the sign bit. */
853 if (op == NULL
854 && code == NEGATE_EXPR
855 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
856 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
857
858 /* For very wide vectors, try using a smaller vector mode. */
859 compute_type = type;
860 if (TYPE_MODE (type) == BLKmode && op)
861 {
862 tree vector_compute_type
863 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
864 TYPE_SATURATING (TREE_TYPE (type)));
865 if (vector_compute_type != NULL_TREE
866 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
867 < TYPE_VECTOR_SUBPARTS (compute_type)))
868 compute_type = vector_compute_type;
869 }
870
871 /* If we are breaking a BLKmode vector into smaller pieces,
872 type_for_widest_vector_mode has already looked into the optab,
873 so skip these checks. */
874 if (compute_type == type)
875 {
876 compute_mode = TYPE_MODE (compute_type);
877 if (VECTOR_MODE_P (compute_mode)
878 && op != NULL
879 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
880 return;
881 else
882 /* There is no operation in hardware, so fall back to scalars. */
883 compute_type = TREE_TYPE (type);
884 }
885
886 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
887 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
888
889 /* Leave expression untouched for later expansion. */
890 if (new_rhs == NULL_TREE)
891 return;
892
893 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
894 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
895 new_rhs);
896
897 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
898 way to do it is change expand_vector_operation and its callees to
899 return a tree_code, RHS1 and RHS2 instead of a tree. */
900 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
901 update_stmt (gsi_stmt (*gsi));
902 }
903 \f
904 /* Use this to lower vector operations introduced by the vectorizer,
905 if it may need the bit-twiddling tricks implemented in this file. */
906
907 static bool
908 gate_expand_vector_operations_ssa (void)
909 {
910 return optimize == 0;
911 }
912
913 static unsigned int
914 expand_vector_operations (void)
915 {
916 gimple_stmt_iterator gsi;
917 basic_block bb;
918 bool cfg_changed = false;
919
920 FOR_EACH_BB (bb)
921 {
922 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
923 {
924 expand_vector_operations_1 (&gsi);
925 /* ??? If we do not cleanup EH then we will ICE in
926 verification. But in reality we have created wrong-code
927 as we did not properly transition EH info and edges to
928 the piecewise computations. */
929 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
930 && gimple_purge_dead_eh_edges (bb))
931 cfg_changed = true;
932 }
933 }
934
935 return cfg_changed ? TODO_cleanup_cfg : 0;
936 }
937
938 struct gimple_opt_pass pass_lower_vector =
939 {
940 {
941 GIMPLE_PASS,
942 "veclower", /* name */
943 gate_expand_vector_operations_ssa, /* gate */
944 expand_vector_operations, /* execute */
945 NULL, /* sub */
946 NULL, /* next */
947 0, /* static_pass_number */
948 TV_NONE, /* tv_id */
949 PROP_cfg, /* properties_required */
950 0, /* properties_provided */
951 0, /* properties_destroyed */
952 0, /* todo_flags_start */
953 TODO_update_ssa /* todo_flags_finish */
954 | TODO_verify_ssa
955 | TODO_verify_stmts | TODO_verify_flow
956 | TODO_cleanup_cfg
957 }
958 };
959
960 struct gimple_opt_pass pass_lower_vector_ssa =
961 {
962 {
963 GIMPLE_PASS,
964 "veclower2", /* name */
965 0, /* gate */
966 expand_vector_operations, /* execute */
967 NULL, /* sub */
968 NULL, /* next */
969 0, /* static_pass_number */
970 TV_NONE, /* tv_id */
971 PROP_cfg, /* properties_required */
972 0, /* properties_provided */
973 0, /* properties_destroyed */
974 0, /* todo_flags_start */
975 TODO_update_ssa /* todo_flags_finish */
976 | TODO_verify_ssa
977 | TODO_verify_stmts | TODO_verify_flow
978 | TODO_cleanup_cfg
979 }
980 };
981
982 #include "gt-tree-vect-generic.h"