tree-vect-generic.c (vector_element): Look at previous generated results.
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
36 #include "expr.h"
37 #include "optabs.h"
38
39
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
41
42
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47 {
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
51 tree ret;
52
53 gcc_assert (n);
54
55 if (width == HOST_BITS_PER_WIDE_INT)
56 low = value;
57 else
58 {
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61 }
62
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66 high = 0;
67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68 high = low;
69 else
70 gcc_unreachable ();
71
72 ret = build_int_cst_wide (type, low, high);
73 return ret;
74 }
75
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
79
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
82 static tree
83 build_word_mode_vector_type (int nunits)
84 {
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
88 {
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
91 }
92
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
98 nunits));
99 return vector_last_type;
100 }
101
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
104
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
108 {
109 if (bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111 else
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113 }
114
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 enum tree_code code)
119 {
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
122 }
123
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
127 {
128 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
129 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
130 return gimplify_build2 (gsi, code, inner_type, a, b);
131 }
132
133 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
134
135 INNER_TYPE is the type of A and B elements
136
137 returned expression is of signed integer type with the
138 size equal to the size of INNER_TYPE. */
139 static tree
140 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
141 tree bitpos, tree bitsize, enum tree_code code)
142 {
143 tree comp_type;
144
145 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
146 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
147
148 comp_type = build_nonstandard_integer_type
149 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
150
151 return gimplify_build3 (gsi, COND_EXPR, comp_type,
152 fold_build2 (code, boolean_type_node, a, b),
153 build_int_cst (comp_type, -1),
154 build_int_cst (comp_type, 0));
155 }
156
157 /* Expand vector addition to scalars. This does bit twiddling
158 in order to increase parallelism:
159
160 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
161 (a ^ b) & 0x80808080
162
163 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
164 (a ^ ~b) & 0x80808080
165
166 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
167
168 This optimization should be done only if 4 vector items or more
169 fit into a word. */
170 static tree
171 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
172 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
173 enum tree_code code)
174 {
175 tree inner_type = TREE_TYPE (TREE_TYPE (a));
176 unsigned HOST_WIDE_INT max;
177 tree low_bits, high_bits, a_low, b_low, result_low, signs;
178
179 max = GET_MODE_MASK (TYPE_MODE (inner_type));
180 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
181 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
182
183 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
184 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
185
186 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
187 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
188 if (code == PLUS_EXPR)
189 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
190 else
191 {
192 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
193 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
194 }
195
196 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
197 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
198 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
199 }
200
201 static tree
202 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
203 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
204 tree bitsize ATTRIBUTE_UNUSED,
205 enum tree_code code ATTRIBUTE_UNUSED)
206 {
207 tree inner_type = TREE_TYPE (TREE_TYPE (b));
208 HOST_WIDE_INT max;
209 tree low_bits, high_bits, b_low, result_low, signs;
210
211 max = GET_MODE_MASK (TYPE_MODE (inner_type));
212 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
213 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
214
215 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
216
217 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
218 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
219 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
220 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
221 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
222 }
223
224 /* Expand a vector operation to scalars, by using many operations
225 whose type is the vector type's inner type. */
226 static tree
227 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
228 tree type, tree inner_type,
229 tree a, tree b, enum tree_code code)
230 {
231 VEC(constructor_elt,gc) *v;
232 tree part_width = TYPE_SIZE (inner_type);
233 tree index = bitsize_int (0);
234 int nunits = TYPE_VECTOR_SUBPARTS (type);
235 int delta = tree_low_cst (part_width, 1)
236 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
237 int i;
238
239 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
240 for (i = 0; i < nunits;
241 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
242 {
243 tree result = f (gsi, inner_type, a, b, index, part_width, code);
244 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
245 ce->index = NULL_TREE;
246 ce->value = result;
247 }
248
249 return build_constructor (type, v);
250 }
251
252 /* Expand a vector operation to scalars with the freedom to use
253 a scalar integer type, or to use a different size for the items
254 in the vector type. */
255 static tree
256 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
257 tree a, tree b,
258 enum tree_code code)
259 {
260 tree result, compute_type;
261 enum machine_mode mode;
262 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
263
264 /* We have three strategies. If the type is already correct, just do
265 the operation an element at a time. Else, if the vector is wider than
266 one word, do it a word at a time; finally, if the vector is smaller
267 than one word, do it as a scalar. */
268 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
269 return expand_vector_piecewise (gsi, f,
270 type, TREE_TYPE (type),
271 a, b, code);
272 else if (n_words > 1)
273 {
274 tree word_type = build_word_mode_vector_type (n_words);
275 result = expand_vector_piecewise (gsi, f,
276 word_type, TREE_TYPE (word_type),
277 a, b, code);
278 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
279 GSI_SAME_STMT);
280 }
281 else
282 {
283 /* Use a single scalar operation with a mode no wider than word_mode. */
284 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
285 compute_type = lang_hooks.types.type_for_mode (mode, 1);
286 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
287 }
288
289 return result;
290 }
291
292 /* Expand a vector operation to scalars; for integer types we can use
293 special bit twiddling tricks to do the sums a word at a time, using
294 function F_PARALLEL instead of F. These tricks are done only if
295 they can process at least four items, that is, only if the vector
296 holds at least four items and if a word can hold four items. */
297 static tree
298 expand_vector_addition (gimple_stmt_iterator *gsi,
299 elem_op_func f, elem_op_func f_parallel,
300 tree type, tree a, tree b, enum tree_code code)
301 {
302 int parts_per_word = UNITS_PER_WORD
303 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
304
305 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
306 && parts_per_word >= 4
307 && TYPE_VECTOR_SUBPARTS (type) >= 4)
308 return expand_vector_parallel (gsi, f_parallel,
309 type, a, b, code);
310 else
311 return expand_vector_piecewise (gsi, f,
312 type, TREE_TYPE (type),
313 a, b, code);
314 }
315
316 /* Check if vector VEC consists of all the equal elements and
317 that the number of elements corresponds to the type of VEC.
318 The function returns first element of the vector
319 or NULL_TREE if the vector is not uniform. */
320 static tree
321 uniform_vector_p (tree vec)
322 {
323 tree first, t, els;
324 unsigned i;
325
326 if (vec == NULL_TREE)
327 return NULL_TREE;
328
329 if (TREE_CODE (vec) == VECTOR_CST)
330 {
331 els = TREE_VECTOR_CST_ELTS (vec);
332 first = TREE_VALUE (els);
333 els = TREE_CHAIN (els);
334
335 for (t = els; t; t = TREE_CHAIN (t))
336 if (!operand_equal_p (first, TREE_VALUE (t), 0))
337 return NULL_TREE;
338
339 return first;
340 }
341
342 else if (TREE_CODE (vec) == CONSTRUCTOR)
343 {
344 first = error_mark_node;
345
346 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
347 {
348 if (i == 0)
349 {
350 first = t;
351 continue;
352 }
353 if (!operand_equal_p (first, t, 0))
354 return NULL_TREE;
355 }
356 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
357 return NULL_TREE;
358
359 return first;
360 }
361
362 return NULL_TREE;
363 }
364
365 /* Try to expand vector comparison expression OP0 CODE OP1 by
366 querying optab if the following expression:
367 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
368 can be expanded. */
369 static tree
370 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
371 tree op1, enum tree_code code)
372 {
373 tree t;
374 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
375 t = expand_vector_piecewise (gsi, do_compare, type,
376 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
377 else
378 t = NULL_TREE;
379
380 return t;
381 }
382
383 static tree
384 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
385 gimple assign, enum tree_code code)
386 {
387 enum machine_mode compute_mode = TYPE_MODE (compute_type);
388
389 /* If the compute mode is not a vector mode (hence we are not decomposing
390 a BLKmode vector to smaller, hardware-supported vectors), we may want
391 to expand the operations in parallel. */
392 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
393 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
394 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
395 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
396 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
397 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
398 switch (code)
399 {
400 case PLUS_EXPR:
401 case MINUS_EXPR:
402 if (!TYPE_OVERFLOW_TRAPS (type))
403 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
404 gimple_assign_rhs1 (assign),
405 gimple_assign_rhs2 (assign), code);
406 break;
407
408 case NEGATE_EXPR:
409 if (!TYPE_OVERFLOW_TRAPS (type))
410 return expand_vector_addition (gsi, do_unop, do_negate, type,
411 gimple_assign_rhs1 (assign),
412 NULL_TREE, code);
413 break;
414
415 case BIT_AND_EXPR:
416 case BIT_IOR_EXPR:
417 case BIT_XOR_EXPR:
418 return expand_vector_parallel (gsi, do_binop, type,
419 gimple_assign_rhs1 (assign),
420 gimple_assign_rhs2 (assign), code);
421
422 case BIT_NOT_EXPR:
423 return expand_vector_parallel (gsi, do_unop, type,
424 gimple_assign_rhs1 (assign),
425 NULL_TREE, code);
426 case EQ_EXPR:
427 case NE_EXPR:
428 case GT_EXPR:
429 case LT_EXPR:
430 case GE_EXPR:
431 case LE_EXPR:
432 case UNEQ_EXPR:
433 case UNGT_EXPR:
434 case UNLT_EXPR:
435 case UNGE_EXPR:
436 case UNLE_EXPR:
437 case LTGT_EXPR:
438 case ORDERED_EXPR:
439 case UNORDERED_EXPR:
440 {
441 tree rhs1 = gimple_assign_rhs1 (assign);
442 tree rhs2 = gimple_assign_rhs2 (assign);
443
444 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
445 }
446 default:
447 break;
448 }
449
450 if (TREE_CODE_CLASS (code) == tcc_unary)
451 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
452 gimple_assign_rhs1 (assign),
453 NULL_TREE, code);
454 else
455 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
456 gimple_assign_rhs1 (assign),
457 gimple_assign_rhs2 (assign), code);
458 }
459 \f
460 /* Return a type for the widest vector mode whose components are of mode
461 INNER_MODE, or NULL_TREE if none is found.
462 SATP is true for saturating fixed-point types. */
463
464 static tree
465 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
466 {
467 enum machine_mode best_mode = VOIDmode, mode;
468 int best_nunits = 0;
469
470 if (SCALAR_FLOAT_MODE_P (inner_mode))
471 mode = MIN_MODE_VECTOR_FLOAT;
472 else if (SCALAR_FRACT_MODE_P (inner_mode))
473 mode = MIN_MODE_VECTOR_FRACT;
474 else if (SCALAR_UFRACT_MODE_P (inner_mode))
475 mode = MIN_MODE_VECTOR_UFRACT;
476 else if (SCALAR_ACCUM_MODE_P (inner_mode))
477 mode = MIN_MODE_VECTOR_ACCUM;
478 else if (SCALAR_UACCUM_MODE_P (inner_mode))
479 mode = MIN_MODE_VECTOR_UACCUM;
480 else
481 mode = MIN_MODE_VECTOR_INT;
482
483 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
484 if (GET_MODE_INNER (mode) == inner_mode
485 && GET_MODE_NUNITS (mode) > best_nunits
486 && optab_handler (op, mode) != CODE_FOR_nothing)
487 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
488
489 if (best_mode == VOIDmode)
490 return NULL_TREE;
491 else
492 {
493 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
494 if (ALL_FIXED_POINT_MODE_P (best_mode))
495 return lang_hooks.types.type_for_mode (best_mode, satp);
496
497 return lang_hooks.types.type_for_mode (best_mode, 1);
498 }
499 }
500
501
502 /* Build a reference to the element of the vector VECT. Function
503 returns either the element itself, either BIT_FIELD_REF, or an
504 ARRAY_REF expression.
505
506 GSI is requred to insert temporary variables while building a
507 refernece to the element of the vector VECT.
508
509 PTMPVEC is a pointer to the temporary variable for caching
510 purposes. In case when PTMPVEC is NULL new temporary variable
511 will be created. */
512 static tree
513 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
514 {
515 tree vect_type, vect_elt_type;
516 gimple asgn;
517 tree tmpvec;
518 tree arraytype;
519 bool need_asgn = true;
520 unsigned int elements;
521
522 vect_type = TREE_TYPE (vect);
523 vect_elt_type = TREE_TYPE (vect_type);
524 elements = TYPE_VECTOR_SUBPARTS (vect_type);
525
526 if (TREE_CODE (idx) == INTEGER_CST)
527 {
528 unsigned HOST_WIDE_INT index;
529
530 /* Given that we're about to compute a binary modulus,
531 we don't care about the high bits of the value. */
532 index = TREE_INT_CST_LOW (idx);
533 if (!host_integerp (idx, 1) || index >= elements)
534 {
535 index &= elements - 1;
536 idx = build_int_cst (TREE_TYPE (idx), index);
537 }
538
539 /* When lowering a vector statement sequence do some easy
540 simplification by looking through intermediate vector results. */
541 if (TREE_CODE (vect) == SSA_NAME)
542 {
543 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
544 if (is_gimple_assign (def_stmt)
545 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
546 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
547 vect = gimple_assign_rhs1 (def_stmt);
548 }
549
550 if (TREE_CODE (vect) == VECTOR_CST)
551 {
552 unsigned i;
553 tree vals = TREE_VECTOR_CST_ELTS (vect);
554 for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
555 if (i == index)
556 return TREE_VALUE (vals);
557 return build_zero_cst (vect_elt_type);
558 }
559 else if (TREE_CODE (vect) == CONSTRUCTOR)
560 {
561 unsigned i;
562 tree elt_i, elt_v;
563
564 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
565 if (operand_equal_p (elt_i, idx, 0))
566 return elt_v;
567 return build_zero_cst (vect_elt_type);
568 }
569 else
570 {
571 tree size = TYPE_SIZE (vect_elt_type);
572 tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
573 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
574 }
575 }
576
577 if (!ptmpvec)
578 tmpvec = create_tmp_var (vect_type, "vectmp");
579 else if (!*ptmpvec)
580 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
581 else
582 {
583 tmpvec = *ptmpvec;
584 need_asgn = false;
585 }
586
587 if (need_asgn)
588 {
589 TREE_ADDRESSABLE (tmpvec) = 1;
590 asgn = gimple_build_assign (tmpvec, vect);
591 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
592 }
593
594 arraytype = build_array_type_nelts (vect_elt_type, elements);
595 return build4 (ARRAY_REF, vect_elt_type,
596 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
597 idx, NULL_TREE, NULL_TREE);
598 }
599
600 /* Check if VEC_SHUFFLE_EXPR within the given setting is supported
601 by hardware, or lower it piecewise.
602
603 When VEC_SHUFFLE_EXPR has the same first and second operands:
604 VEC_SHUFFLE_EXPR <v0, v0, mask> the lowered version would be
605 {v0[mask[0]], v0[mask[1]], ...}
606 MASK and V0 must have the same number of elements.
607
608 Otherwise VEC_SHUFFLE_EXPR <v0, v1, mask> is lowered to
609 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
610 V0 and V1 must have the same type. MASK, V0, V1 must have the
611 same number of arguments. */
612
613 static void
614 lower_vec_shuffle (gimple_stmt_iterator *gsi)
615 {
616 gimple stmt = gsi_stmt (*gsi);
617 tree mask = gimple_assign_rhs3 (stmt);
618 tree vec0 = gimple_assign_rhs1 (stmt);
619 tree vec1 = gimple_assign_rhs2 (stmt);
620 tree vect_type = TREE_TYPE (vec0);
621 tree mask_type = TREE_TYPE (mask);
622 tree vect_elt_type = TREE_TYPE (vect_type);
623 tree mask_elt_type = TREE_TYPE (mask_type);
624 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
625 VEC(constructor_elt,gc) *v;
626 tree constr, t, si, i_val;
627 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
628 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
629 unsigned i;
630
631 if (expand_vec_shuffle_expr_p (TYPE_MODE (vect_type), vec0, vec1, mask))
632 return;
633
634 v = VEC_alloc (constructor_elt, gc, elements);
635 for (i = 0; i < elements; i++)
636 {
637 si = size_int (i);
638 i_val = vector_element (gsi, mask, si, &masktmp);
639
640 if (TREE_CODE (i_val) == INTEGER_CST)
641 {
642 unsigned HOST_WIDE_INT index;
643
644 index = TREE_INT_CST_LOW (i_val);
645 if (!host_integerp (i_val, 1) || index >= elements)
646 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
647
648 if (two_operand_p && (index & elements) != 0)
649 t = vector_element (gsi, vec1, i_val, &vec1tmp);
650 else
651 t = vector_element (gsi, vec0, i_val, &vec0tmp);
652
653 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
654 true, GSI_SAME_STMT);
655 }
656 else
657 {
658 tree cond = NULL_TREE, v0_val;
659
660 if (two_operand_p)
661 {
662 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
663 build_int_cst (mask_elt_type, elements));
664 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
665 true, GSI_SAME_STMT);
666 }
667
668 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
669 build_int_cst (mask_elt_type, elements - 1));
670 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
671 true, GSI_SAME_STMT);
672
673 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
674 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
675 true, GSI_SAME_STMT);
676
677 if (two_operand_p)
678 {
679 tree v1_val;
680
681 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
682 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
683 true, GSI_SAME_STMT);
684
685 cond = fold_build2 (EQ_EXPR, boolean_type_node,
686 cond, build_zero_cst (mask_elt_type));
687 cond = fold_build3 (COND_EXPR, vect_elt_type,
688 cond, v0_val, v1_val);
689 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
690 true, GSI_SAME_STMT);
691 }
692 else
693 t = v0_val;
694 }
695
696 CONSTRUCTOR_APPEND_ELT (v, si, t);
697 }
698
699 constr = build_constructor (vect_type, v);
700 gimple_assign_set_rhs_from_tree (gsi, constr);
701 update_stmt (gsi_stmt (*gsi));
702 }
703
704 /* Process one statement. If we identify a vector operation, expand it. */
705
706 static void
707 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
708 {
709 gimple stmt = gsi_stmt (*gsi);
710 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
711 enum tree_code code;
712 enum machine_mode compute_mode;
713 optab op = NULL;
714 enum gimple_rhs_class rhs_class;
715 tree new_rhs;
716
717 if (gimple_code (stmt) != GIMPLE_ASSIGN)
718 return;
719
720 code = gimple_assign_rhs_code (stmt);
721 rhs_class = get_gimple_rhs_class (code);
722 lhs = gimple_assign_lhs (stmt);
723
724 if (code == VEC_SHUFFLE_EXPR)
725 {
726 lower_vec_shuffle (gsi);
727 return;
728 }
729
730 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
731 return;
732
733 rhs1 = gimple_assign_rhs1 (stmt);
734 type = gimple_expr_type (stmt);
735 if (rhs_class == GIMPLE_BINARY_RHS)
736 rhs2 = gimple_assign_rhs2 (stmt);
737
738 if (TREE_CODE (type) != VECTOR_TYPE)
739 return;
740
741 if (code == NOP_EXPR
742 || code == FLOAT_EXPR
743 || code == FIX_TRUNC_EXPR
744 || code == VIEW_CONVERT_EXPR)
745 return;
746
747 gcc_assert (code != CONVERT_EXPR);
748
749 /* The signedness is determined from input argument. */
750 if (code == VEC_UNPACK_FLOAT_HI_EXPR
751 || code == VEC_UNPACK_FLOAT_LO_EXPR)
752 type = TREE_TYPE (rhs1);
753
754 /* Choose between vector shift/rotate by vector and vector shift/rotate by
755 scalar */
756 if (code == LSHIFT_EXPR
757 || code == RSHIFT_EXPR
758 || code == LROTATE_EXPR
759 || code == RROTATE_EXPR)
760 {
761 bool vector_scalar_shift;
762 op = optab_for_tree_code (code, type, optab_scalar);
763
764 /* Vector/Scalar shift is supported. */
765 vector_scalar_shift = (op && (optab_handler (op, TYPE_MODE (type))
766 != CODE_FOR_nothing));
767
768 /* If the 2nd argument is vector, we need a vector/vector shift.
769 Except all the elements in the second vector are the same. */
770 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
771 {
772 tree first;
773 gimple def_stmt;
774
775 /* Check whether we have vector <op> {x,x,x,x} where x
776 could be a scalar variable or a constant. Transform
777 vector <op> {x,x,x,x} ==> vector <op> scalar. */
778 if (vector_scalar_shift
779 && ((TREE_CODE (rhs2) == VECTOR_CST
780 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
781 || (TREE_CODE (rhs2) == SSA_NAME
782 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
783 && gimple_assign_single_p (def_stmt)
784 && (first = uniform_vector_p
785 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)))
786 {
787 gimple_assign_set_rhs2 (stmt, first);
788 update_stmt (stmt);
789 rhs2 = first;
790 }
791 else
792 op = optab_for_tree_code (code, type, optab_vector);
793 }
794
795 /* Try for a vector/scalar shift, and if we don't have one, see if we
796 have a vector/vector shift */
797 else if (!vector_scalar_shift)
798 {
799 op = optab_for_tree_code (code, type, optab_vector);
800
801 if (op && (optab_handler (op, TYPE_MODE (type))
802 != CODE_FOR_nothing))
803 {
804 /* Transform vector <op> scalar => vector <op> {x,x,x,x}. */
805 int n_parts = TYPE_VECTOR_SUBPARTS (type);
806 int part_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
807 tree part_type = lang_hooks.types.type_for_size (part_size, 1);
808 tree vect_type = build_vector_type (part_type, n_parts);
809
810 rhs2 = fold_convert (part_type, rhs2);
811 rhs2 = build_vector_from_val (vect_type, rhs2);
812 gimple_assign_set_rhs2 (stmt, rhs2);
813 update_stmt (stmt);
814 }
815 }
816 }
817 else
818 op = optab_for_tree_code (code, type, optab_default);
819
820 /* For widening/narrowing vector operations, the relevant type is of the
821 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
822 calculated in the same way above. */
823 if (code == WIDEN_SUM_EXPR
824 || code == VEC_WIDEN_MULT_HI_EXPR
825 || code == VEC_WIDEN_MULT_LO_EXPR
826 || code == VEC_UNPACK_HI_EXPR
827 || code == VEC_UNPACK_LO_EXPR
828 || code == VEC_PACK_TRUNC_EXPR
829 || code == VEC_PACK_SAT_EXPR
830 || code == VEC_PACK_FIX_TRUNC_EXPR)
831 type = TREE_TYPE (rhs1);
832
833 /* Optabs will try converting a negation into a subtraction, so
834 look for it as well. TODO: negation of floating-point vectors
835 might be turned into an exclusive OR toggling the sign bit. */
836 if (op == NULL
837 && code == NEGATE_EXPR
838 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
839 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
840
841 /* For very wide vectors, try using a smaller vector mode. */
842 compute_type = type;
843 if (TYPE_MODE (type) == BLKmode && op)
844 {
845 tree vector_compute_type
846 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
847 TYPE_SATURATING (TREE_TYPE (type)));
848 if (vector_compute_type != NULL_TREE
849 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
850 < TYPE_VECTOR_SUBPARTS (compute_type)))
851 compute_type = vector_compute_type;
852 }
853
854 /* If we are breaking a BLKmode vector into smaller pieces,
855 type_for_widest_vector_mode has already looked into the optab,
856 so skip these checks. */
857 if (compute_type == type)
858 {
859 compute_mode = TYPE_MODE (compute_type);
860 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
861 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT
862 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT
863 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT
864 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM
865 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM)
866 && op != NULL
867 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
868 return;
869 else
870 /* There is no operation in hardware, so fall back to scalars. */
871 compute_type = TREE_TYPE (type);
872 }
873
874 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
875 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
876
877 /* Leave expression untouched for later expansion. */
878 if (new_rhs == NULL_TREE)
879 return;
880
881 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
882 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
883 new_rhs);
884
885 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
886 way to do it is change expand_vector_operation and its callees to
887 return a tree_code, RHS1 and RHS2 instead of a tree. */
888 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
889 update_stmt (gsi_stmt (*gsi));
890 }
891 \f
892 /* Use this to lower vector operations introduced by the vectorizer,
893 if it may need the bit-twiddling tricks implemented in this file. */
894
895 static bool
896 gate_expand_vector_operations_ssa (void)
897 {
898 return optimize == 0;
899 }
900
901 static unsigned int
902 expand_vector_operations (void)
903 {
904 gimple_stmt_iterator gsi;
905 basic_block bb;
906 bool cfg_changed = false;
907
908 FOR_EACH_BB (bb)
909 {
910 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
911 {
912 expand_vector_operations_1 (&gsi);
913 /* ??? If we do not cleanup EH then we will ICE in
914 verification. But in reality we have created wrong-code
915 as we did not properly transition EH info and edges to
916 the piecewise computations. */
917 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
918 && gimple_purge_dead_eh_edges (bb))
919 cfg_changed = true;
920 }
921 }
922
923 return cfg_changed ? TODO_cleanup_cfg : 0;
924 }
925
926 struct gimple_opt_pass pass_lower_vector =
927 {
928 {
929 GIMPLE_PASS,
930 "veclower", /* name */
931 gate_expand_vector_operations_ssa, /* gate */
932 expand_vector_operations, /* execute */
933 NULL, /* sub */
934 NULL, /* next */
935 0, /* static_pass_number */
936 TV_NONE, /* tv_id */
937 PROP_cfg, /* properties_required */
938 0, /* properties_provided */
939 0, /* properties_destroyed */
940 0, /* todo_flags_start */
941 TODO_update_ssa /* todo_flags_finish */
942 | TODO_verify_ssa
943 | TODO_verify_stmts | TODO_verify_flow
944 | TODO_cleanup_cfg
945 }
946 };
947
948 struct gimple_opt_pass pass_lower_vector_ssa =
949 {
950 {
951 GIMPLE_PASS,
952 "veclower2", /* name */
953 0, /* gate */
954 expand_vector_operations, /* execute */
955 NULL, /* sub */
956 NULL, /* next */
957 0, /* static_pass_number */
958 TV_NONE, /* tv_id */
959 PROP_cfg, /* properties_required */
960 0, /* properties_provided */
961 0, /* properties_destroyed */
962 0, /* todo_flags_start */
963 TODO_update_ssa /* todo_flags_finish */
964 | TODO_verify_ssa
965 | TODO_verify_stmts | TODO_verify_flow
966 | TODO_cleanup_cfg
967 }
968 };
969
970 #include "gt-tree-vect-generic.h"