builtins.c (fold_builtin_atomic_always_lock_free): Use CONVERT_EXPR_P, CONVERT_EXPR_C...
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "stor-layout.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "predict.h"
28 #include "vec.h"
29 #include "hashtab.h"
30 #include "hash-set.h"
31 #include "machmode.h"
32 #include "hard-reg-set.h"
33 #include "input.h"
34 #include "function.h"
35 #include "dominance.h"
36 #include "cfg.h"
37 #include "basic-block.h"
38 #include "tree-ssa-alias.h"
39 #include "internal-fn.h"
40 #include "tree-eh.h"
41 #include "gimple-expr.h"
42 #include "is-a.h"
43 #include "gimple.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-ssa.h"
47 #include "tree-cfg.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-iterator.h"
51 #include "tree-pass.h"
52 #include "flags.h"
53 #include "diagnostic.h"
54 #include "target.h"
55
56 /* Need to include rtl.h, expr.h, etc. for optabs. */
57 #include "expr.h"
58 #include "optabs.h"
59
60
61 static void expand_vector_operations_1 (gimple_stmt_iterator *);
62
63
64 /* Build a constant of type TYPE, made of VALUE's bits replicated
65 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
66 static tree
67 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
68 {
69 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
70 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
71 / HOST_BITS_PER_WIDE_INT;
72 unsigned HOST_WIDE_INT low, mask;
73 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
74 int i;
75
76 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
77
78 if (width == HOST_BITS_PER_WIDE_INT)
79 low = value;
80 else
81 {
82 mask = ((HOST_WIDE_INT)1 << width) - 1;
83 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
84 }
85
86 for (i = 0; i < n; i++)
87 a[i] = low;
88
89 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
90 return wide_int_to_tree
91 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
92 }
93
94 static GTY(()) tree vector_inner_type;
95 static GTY(()) tree vector_last_type;
96 static GTY(()) int vector_last_nunits;
97
98 /* Return a suitable vector types made of SUBPARTS units each of mode
99 "word_mode" (the global variable). */
100 static tree
101 build_word_mode_vector_type (int nunits)
102 {
103 if (!vector_inner_type)
104 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
105 else if (vector_last_nunits == nunits)
106 {
107 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
108 return vector_last_type;
109 }
110
111 /* We build a new type, but we canonicalize it nevertheless,
112 because it still saves some memory. */
113 vector_last_nunits = nunits;
114 vector_last_type = type_hash_canon (nunits,
115 build_vector_type (vector_inner_type,
116 nunits));
117 return vector_last_type;
118 }
119
120 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
121 tree, tree, tree, tree, tree, enum tree_code);
122
123 static inline tree
124 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
125 tree t, tree bitsize, tree bitpos)
126 {
127 if (bitpos)
128 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
129 else
130 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
131 }
132
133 static tree
134 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
135 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
136 enum tree_code code)
137 {
138 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
139 return gimplify_build1 (gsi, code, inner_type, a);
140 }
141
142 static tree
143 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
144 tree bitpos, tree bitsize, enum tree_code code)
145 {
146 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
149 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
150 return gimplify_build2 (gsi, code, inner_type, a, b);
151 }
152
153 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
154
155 INNER_TYPE is the type of A and B elements
156
157 returned expression is of signed integer type with the
158 size equal to the size of INNER_TYPE. */
159 static tree
160 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
161 tree bitpos, tree bitsize, enum tree_code code)
162 {
163 tree comp_type;
164
165 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
166 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
167
168 comp_type = build_nonstandard_integer_type
169 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
170
171 return gimplify_build3 (gsi, COND_EXPR, comp_type,
172 fold_build2 (code, boolean_type_node, a, b),
173 build_int_cst (comp_type, -1),
174 build_int_cst (comp_type, 0));
175 }
176
177 /* Expand vector addition to scalars. This does bit twiddling
178 in order to increase parallelism:
179
180 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
181 (a ^ b) & 0x80808080
182
183 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
184 (a ^ ~b) & 0x80808080
185
186 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
187
188 This optimization should be done only if 4 vector items or more
189 fit into a word. */
190 static tree
191 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
192 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
193 enum tree_code code)
194 {
195 tree inner_type = TREE_TYPE (TREE_TYPE (a));
196 unsigned HOST_WIDE_INT max;
197 tree low_bits, high_bits, a_low, b_low, result_low, signs;
198
199 max = GET_MODE_MASK (TYPE_MODE (inner_type));
200 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
201 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
202
203 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
204 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
205
206 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
207 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
208 if (code == PLUS_EXPR)
209 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
210 else
211 {
212 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
213 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
214 }
215
216 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
217 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
218 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
219 }
220
221 static tree
222 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
223 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
224 tree bitsize ATTRIBUTE_UNUSED,
225 enum tree_code code ATTRIBUTE_UNUSED)
226 {
227 tree inner_type = TREE_TYPE (TREE_TYPE (b));
228 HOST_WIDE_INT max;
229 tree low_bits, high_bits, b_low, result_low, signs;
230
231 max = GET_MODE_MASK (TYPE_MODE (inner_type));
232 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
233 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
234
235 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
236
237 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
238 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
239 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
240 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
241 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
242 }
243
244 /* Expand a vector operation to scalars, by using many operations
245 whose type is the vector type's inner type. */
246 static tree
247 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
248 tree type, tree inner_type,
249 tree a, tree b, enum tree_code code)
250 {
251 vec<constructor_elt, va_gc> *v;
252 tree part_width = TYPE_SIZE (inner_type);
253 tree index = bitsize_int (0);
254 int nunits = TYPE_VECTOR_SUBPARTS (type);
255 int delta = tree_to_uhwi (part_width)
256 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
257 int i;
258 location_t loc = gimple_location (gsi_stmt (*gsi));
259
260 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
261 warning_at (loc, OPT_Wvector_operation_performance,
262 "vector operation will be expanded piecewise");
263 else
264 warning_at (loc, OPT_Wvector_operation_performance,
265 "vector operation will be expanded in parallel");
266
267 vec_alloc (v, (nunits + delta - 1) / delta);
268 for (i = 0; i < nunits;
269 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
270 {
271 tree result = f (gsi, inner_type, a, b, index, part_width, code);
272 constructor_elt ce = {NULL_TREE, result};
273 v->quick_push (ce);
274 }
275
276 return build_constructor (type, v);
277 }
278
279 /* Expand a vector operation to scalars with the freedom to use
280 a scalar integer type, or to use a different size for the items
281 in the vector type. */
282 static tree
283 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
284 tree a, tree b,
285 enum tree_code code)
286 {
287 tree result, compute_type;
288 machine_mode mode;
289 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
290 location_t loc = gimple_location (gsi_stmt (*gsi));
291
292 /* We have three strategies. If the type is already correct, just do
293 the operation an element at a time. Else, if the vector is wider than
294 one word, do it a word at a time; finally, if the vector is smaller
295 than one word, do it as a scalar. */
296 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
297 return expand_vector_piecewise (gsi, f,
298 type, TREE_TYPE (type),
299 a, b, code);
300 else if (n_words > 1)
301 {
302 tree word_type = build_word_mode_vector_type (n_words);
303 result = expand_vector_piecewise (gsi, f,
304 word_type, TREE_TYPE (word_type),
305 a, b, code);
306 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
307 GSI_SAME_STMT);
308 }
309 else
310 {
311 /* Use a single scalar operation with a mode no wider than word_mode. */
312 mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
313 compute_type = lang_hooks.types.type_for_mode (mode, 1);
314 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
315 warning_at (loc, OPT_Wvector_operation_performance,
316 "vector operation will be expanded with a "
317 "single scalar operation");
318 }
319
320 return result;
321 }
322
323 /* Expand a vector operation to scalars; for integer types we can use
324 special bit twiddling tricks to do the sums a word at a time, using
325 function F_PARALLEL instead of F. These tricks are done only if
326 they can process at least four items, that is, only if the vector
327 holds at least four items and if a word can hold four items. */
328 static tree
329 expand_vector_addition (gimple_stmt_iterator *gsi,
330 elem_op_func f, elem_op_func f_parallel,
331 tree type, tree a, tree b, enum tree_code code)
332 {
333 int parts_per_word = UNITS_PER_WORD
334 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
335
336 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
337 && parts_per_word >= 4
338 && TYPE_VECTOR_SUBPARTS (type) >= 4)
339 return expand_vector_parallel (gsi, f_parallel,
340 type, a, b, code);
341 else
342 return expand_vector_piecewise (gsi, f,
343 type, TREE_TYPE (type),
344 a, b, code);
345 }
346
347 /* Try to expand vector comparison expression OP0 CODE OP1 by
348 querying optab if the following expression:
349 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
350 can be expanded. */
351 static tree
352 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
353 tree op1, enum tree_code code)
354 {
355 tree t;
356 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
357 t = expand_vector_piecewise (gsi, do_compare, type,
358 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
359 else
360 t = NULL_TREE;
361
362 return t;
363 }
364
365 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
366 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
367 the result if successful, otherwise return NULL_TREE. */
368 static tree
369 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
370 {
371 optab op;
372 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
373 bool scalar_shift = true;
374
375 for (i = 1; i < nunits; i++)
376 {
377 if (shiftcnts[i] != shiftcnts[0])
378 scalar_shift = false;
379 }
380
381 if (scalar_shift && shiftcnts[0] == 0)
382 return op0;
383
384 if (scalar_shift)
385 {
386 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
387 if (op != unknown_optab
388 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
389 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
390 build_int_cst (NULL_TREE, shiftcnts[0]));
391 }
392
393 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
394 if (op != unknown_optab
395 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
396 {
397 tree *vec = XALLOCAVEC (tree, nunits);
398 for (i = 0; i < nunits; i++)
399 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
400 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
401 build_vector (type, vec));
402 }
403
404 return NULL_TREE;
405 }
406
407 /* Try to expand integer vector division by constant using
408 widening multiply, shifts and additions. */
409 static tree
410 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
411 tree op1, enum tree_code code)
412 {
413 bool use_pow2 = true;
414 bool has_vector_shift = true;
415 int mode = -1, this_mode;
416 int pre_shift = -1, post_shift;
417 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
418 int *shifts = XALLOCAVEC (int, nunits * 4);
419 int *pre_shifts = shifts + nunits;
420 int *post_shifts = pre_shifts + nunits;
421 int *shift_temps = post_shifts + nunits;
422 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
423 int prec = TYPE_PRECISION (TREE_TYPE (type));
424 int dummy_int;
425 unsigned int i;
426 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
427 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
428 tree *vec;
429 tree cur_op, mulcst, tem;
430 optab op;
431
432 if (prec > HOST_BITS_PER_WIDE_INT)
433 return NULL_TREE;
434
435 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
436 if (op == unknown_optab
437 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
438 has_vector_shift = false;
439
440 /* Analysis phase. Determine if all op1 elements are either power
441 of two and it is possible to expand it using shifts (or for remainder
442 using masking). Additionally compute the multiplicative constants
443 and pre and post shifts if the division is to be expanded using
444 widening or high part multiplication plus shifts. */
445 for (i = 0; i < nunits; i++)
446 {
447 tree cst = VECTOR_CST_ELT (op1, i);
448 unsigned HOST_WIDE_INT ml;
449
450 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
451 return NULL_TREE;
452 pre_shifts[i] = 0;
453 post_shifts[i] = 0;
454 mulc[i] = 0;
455 if (use_pow2
456 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
457 use_pow2 = false;
458 if (use_pow2)
459 {
460 shifts[i] = tree_log2 (cst);
461 if (shifts[i] != shifts[0]
462 && code == TRUNC_DIV_EXPR
463 && !has_vector_shift)
464 use_pow2 = false;
465 }
466 if (mode == -2)
467 continue;
468 if (sign_p == UNSIGNED)
469 {
470 unsigned HOST_WIDE_INT mh;
471 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
472
473 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
474 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
475 return NULL_TREE;
476
477 if (d <= 1)
478 {
479 mode = -2;
480 continue;
481 }
482
483 /* Find a suitable multiplier and right shift count
484 instead of multiplying with D. */
485 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
486
487 /* If the suggested multiplier is more than SIZE bits, we can
488 do better for even divisors, using an initial right shift. */
489 if ((mh != 0 && (d & 1) == 0)
490 || (!has_vector_shift && pre_shift != -1))
491 {
492 if (has_vector_shift)
493 pre_shift = floor_log2 (d & -d);
494 else if (pre_shift == -1)
495 {
496 unsigned int j;
497 for (j = 0; j < nunits; j++)
498 {
499 tree cst2 = VECTOR_CST_ELT (op1, j);
500 unsigned HOST_WIDE_INT d2;
501 int this_pre_shift;
502
503 if (!tree_fits_uhwi_p (cst2))
504 return NULL_TREE;
505 d2 = tree_to_uhwi (cst2) & mask;
506 if (d2 == 0)
507 return NULL_TREE;
508 this_pre_shift = floor_log2 (d2 & -d2);
509 if (pre_shift == -1 || this_pre_shift < pre_shift)
510 pre_shift = this_pre_shift;
511 }
512 if (i != 0 && pre_shift != 0)
513 {
514 /* Restart. */
515 i = -1U;
516 mode = -1;
517 continue;
518 }
519 }
520 if (pre_shift != 0)
521 {
522 if ((d >> pre_shift) <= 1)
523 {
524 mode = -2;
525 continue;
526 }
527 mh = choose_multiplier (d >> pre_shift, prec,
528 prec - pre_shift,
529 &ml, &post_shift, &dummy_int);
530 gcc_assert (!mh);
531 pre_shifts[i] = pre_shift;
532 }
533 }
534 if (!mh)
535 this_mode = 0;
536 else
537 this_mode = 1;
538 }
539 else
540 {
541 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
542 unsigned HOST_WIDE_INT abs_d;
543
544 if (d == -1)
545 return NULL_TREE;
546
547 /* Since d might be INT_MIN, we have to cast to
548 unsigned HOST_WIDE_INT before negating to avoid
549 undefined signed overflow. */
550 abs_d = (d >= 0
551 ? (unsigned HOST_WIDE_INT) d
552 : - (unsigned HOST_WIDE_INT) d);
553
554 /* n rem d = n rem -d */
555 if (code == TRUNC_MOD_EXPR && d < 0)
556 d = abs_d;
557 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
558 {
559 /* This case is not handled correctly below. */
560 mode = -2;
561 continue;
562 }
563 if (abs_d <= 1)
564 {
565 mode = -2;
566 continue;
567 }
568
569 choose_multiplier (abs_d, prec, prec - 1, &ml,
570 &post_shift, &dummy_int);
571 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
572 {
573 this_mode = 4 + (d < 0);
574 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
575 }
576 else
577 this_mode = 2 + (d < 0);
578 }
579 mulc[i] = ml;
580 post_shifts[i] = post_shift;
581 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
582 || post_shift >= prec
583 || pre_shifts[i] >= prec)
584 this_mode = -2;
585
586 if (i == 0)
587 mode = this_mode;
588 else if (mode != this_mode)
589 mode = -2;
590 }
591
592 vec = XALLOCAVEC (tree, nunits);
593
594 if (use_pow2)
595 {
596 tree addend = NULL_TREE;
597 if (sign_p == SIGNED)
598 {
599 tree uns_type;
600
601 /* Both division and remainder sequences need
602 op0 < 0 ? mask : 0 computed. It can be either computed as
603 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
604 if none of the shifts is 0, or as the conditional. */
605 for (i = 0; i < nunits; i++)
606 if (shifts[i] == 0)
607 break;
608 uns_type
609 = build_vector_type (build_nonstandard_integer_type (prec, 1),
610 nunits);
611 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
612 {
613 for (i = 0; i < nunits; i++)
614 shift_temps[i] = prec - 1;
615 cur_op = add_rshift (gsi, type, op0, shift_temps);
616 if (cur_op != NULL_TREE)
617 {
618 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
619 uns_type, cur_op);
620 for (i = 0; i < nunits; i++)
621 shift_temps[i] = prec - shifts[i];
622 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
623 if (cur_op != NULL_TREE)
624 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
625 type, cur_op);
626 }
627 }
628 if (addend == NULL_TREE
629 && expand_vec_cond_expr_p (type, type))
630 {
631 tree zero, cst, cond;
632 gimple stmt;
633
634 zero = build_zero_cst (type);
635 cond = build2 (LT_EXPR, type, op0, zero);
636 for (i = 0; i < nunits; i++)
637 vec[i] = build_int_cst (TREE_TYPE (type),
638 ((unsigned HOST_WIDE_INT) 1
639 << shifts[i]) - 1);
640 cst = build_vector (type, vec);
641 addend = make_ssa_name (type, NULL);
642 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
643 cond, cst, zero);
644 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
645 }
646 }
647 if (code == TRUNC_DIV_EXPR)
648 {
649 if (sign_p == UNSIGNED)
650 {
651 /* q = op0 >> shift; */
652 cur_op = add_rshift (gsi, type, op0, shifts);
653 if (cur_op != NULL_TREE)
654 return cur_op;
655 }
656 else if (addend != NULL_TREE)
657 {
658 /* t1 = op0 + addend;
659 q = t1 >> shift; */
660 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
661 if (op != unknown_optab
662 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
663 {
664 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
665 cur_op = add_rshift (gsi, type, cur_op, shifts);
666 if (cur_op != NULL_TREE)
667 return cur_op;
668 }
669 }
670 }
671 else
672 {
673 tree mask;
674 for (i = 0; i < nunits; i++)
675 vec[i] = build_int_cst (TREE_TYPE (type),
676 ((unsigned HOST_WIDE_INT) 1
677 << shifts[i]) - 1);
678 mask = build_vector (type, vec);
679 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
680 if (op != unknown_optab
681 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
682 {
683 if (sign_p == UNSIGNED)
684 /* r = op0 & mask; */
685 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
686 else if (addend != NULL_TREE)
687 {
688 /* t1 = op0 + addend;
689 t2 = t1 & mask;
690 r = t2 - addend; */
691 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
692 if (op != unknown_optab
693 && optab_handler (op, TYPE_MODE (type))
694 != CODE_FOR_nothing)
695 {
696 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
697 addend);
698 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
699 cur_op, mask);
700 op = optab_for_tree_code (MINUS_EXPR, type,
701 optab_default);
702 if (op != unknown_optab
703 && optab_handler (op, TYPE_MODE (type))
704 != CODE_FOR_nothing)
705 return gimplify_build2 (gsi, MINUS_EXPR, type,
706 cur_op, addend);
707 }
708 }
709 }
710 }
711 }
712
713 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
714 return NULL_TREE;
715
716 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
717 return NULL_TREE;
718
719 cur_op = op0;
720
721 switch (mode)
722 {
723 case 0:
724 gcc_assert (sign_p == UNSIGNED);
725 /* t1 = oprnd0 >> pre_shift;
726 t2 = t1 h* ml;
727 q = t2 >> post_shift; */
728 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
729 if (cur_op == NULL_TREE)
730 return NULL_TREE;
731 break;
732 case 1:
733 gcc_assert (sign_p == UNSIGNED);
734 for (i = 0; i < nunits; i++)
735 {
736 shift_temps[i] = 1;
737 post_shifts[i]--;
738 }
739 break;
740 case 2:
741 case 3:
742 case 4:
743 case 5:
744 gcc_assert (sign_p == SIGNED);
745 for (i = 0; i < nunits; i++)
746 shift_temps[i] = prec - 1;
747 break;
748 default:
749 return NULL_TREE;
750 }
751
752 for (i = 0; i < nunits; i++)
753 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
754 mulcst = build_vector (type, vec);
755
756 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
757
758 switch (mode)
759 {
760 case 0:
761 /* t1 = oprnd0 >> pre_shift;
762 t2 = t1 h* ml;
763 q = t2 >> post_shift; */
764 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
765 break;
766 case 1:
767 /* t1 = oprnd0 h* ml;
768 t2 = oprnd0 - t1;
769 t3 = t2 >> 1;
770 t4 = t1 + t3;
771 q = t4 >> (post_shift - 1); */
772 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
773 if (op == unknown_optab
774 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
775 return NULL_TREE;
776 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
777 tem = add_rshift (gsi, type, tem, shift_temps);
778 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
779 if (op == unknown_optab
780 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
781 return NULL_TREE;
782 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
783 cur_op = add_rshift (gsi, type, tem, post_shifts);
784 if (cur_op == NULL_TREE)
785 return NULL_TREE;
786 break;
787 case 2:
788 case 3:
789 case 4:
790 case 5:
791 /* t1 = oprnd0 h* ml;
792 t2 = t1; [ iff (mode & 2) != 0 ]
793 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
794 t3 = t2 >> post_shift;
795 t4 = oprnd0 >> (prec - 1);
796 q = t3 - t4; [ iff (mode & 1) == 0 ]
797 q = t4 - t3; [ iff (mode & 1) != 0 ] */
798 if ((mode & 2) == 0)
799 {
800 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
801 if (op == unknown_optab
802 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
803 return NULL_TREE;
804 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
805 }
806 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
807 if (cur_op == NULL_TREE)
808 return NULL_TREE;
809 tem = add_rshift (gsi, type, op0, shift_temps);
810 if (tem == NULL_TREE)
811 return NULL_TREE;
812 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
813 if (op == unknown_optab
814 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
815 return NULL_TREE;
816 if ((mode & 1) == 0)
817 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
818 else
819 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
820 break;
821 default:
822 gcc_unreachable ();
823 }
824
825 if (code == TRUNC_DIV_EXPR)
826 return cur_op;
827
828 /* We divided. Now finish by:
829 t1 = q * oprnd1;
830 r = oprnd0 - t1; */
831 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
832 if (op == unknown_optab
833 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
834 return NULL_TREE;
835 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
836 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
837 if (op == unknown_optab
838 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
839 return NULL_TREE;
840 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
841 }
842
843 /* Expand a vector condition to scalars, by using many conditions
844 on the vector's elements. */
845 static void
846 expand_vector_condition (gimple_stmt_iterator *gsi)
847 {
848 gimple stmt = gsi_stmt (*gsi);
849 tree type = gimple_expr_type (stmt);
850 tree a = gimple_assign_rhs1 (stmt);
851 tree a1 = a;
852 tree a2;
853 bool a_is_comparison = false;
854 tree b = gimple_assign_rhs2 (stmt);
855 tree c = gimple_assign_rhs3 (stmt);
856 vec<constructor_elt, va_gc> *v;
857 tree constr;
858 tree inner_type = TREE_TYPE (type);
859 tree cond_type = TREE_TYPE (TREE_TYPE (a));
860 tree comp_inner_type = cond_type;
861 tree width = TYPE_SIZE (inner_type);
862 tree index = bitsize_int (0);
863 int nunits = TYPE_VECTOR_SUBPARTS (type);
864 int i;
865 location_t loc = gimple_location (gsi_stmt (*gsi));
866
867 if (!is_gimple_val (a))
868 {
869 gcc_assert (COMPARISON_CLASS_P (a));
870 a_is_comparison = true;
871 a1 = TREE_OPERAND (a, 0);
872 a2 = TREE_OPERAND (a, 1);
873 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
874 }
875
876 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
877 return;
878
879 /* TODO: try and find a smaller vector type. */
880
881 warning_at (loc, OPT_Wvector_operation_performance,
882 "vector condition will be expanded piecewise");
883
884 vec_alloc (v, nunits);
885 for (i = 0; i < nunits;
886 i++, index = int_const_binop (PLUS_EXPR, index, width))
887 {
888 tree aa, result;
889 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
890 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
891 if (a_is_comparison)
892 {
893 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
894 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
895 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
896 }
897 else
898 aa = tree_vec_extract (gsi, cond_type, a, width, index);
899 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
900 constructor_elt ce = {NULL_TREE, result};
901 v->quick_push (ce);
902 }
903
904 constr = build_constructor (type, v);
905 gimple_assign_set_rhs_from_tree (gsi, constr);
906 update_stmt (gsi_stmt (*gsi));
907 }
908
909 static tree
910 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
911 gimple assign, enum tree_code code)
912 {
913 machine_mode compute_mode = TYPE_MODE (compute_type);
914
915 /* If the compute mode is not a vector mode (hence we are not decomposing
916 a BLKmode vector to smaller, hardware-supported vectors), we may want
917 to expand the operations in parallel. */
918 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
919 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
920 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
921 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
922 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
923 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
924 switch (code)
925 {
926 case PLUS_EXPR:
927 case MINUS_EXPR:
928 if (!TYPE_OVERFLOW_TRAPS (type))
929 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
930 gimple_assign_rhs1 (assign),
931 gimple_assign_rhs2 (assign), code);
932 break;
933
934 case NEGATE_EXPR:
935 if (!TYPE_OVERFLOW_TRAPS (type))
936 return expand_vector_addition (gsi, do_unop, do_negate, type,
937 gimple_assign_rhs1 (assign),
938 NULL_TREE, code);
939 break;
940
941 case BIT_AND_EXPR:
942 case BIT_IOR_EXPR:
943 case BIT_XOR_EXPR:
944 return expand_vector_parallel (gsi, do_binop, type,
945 gimple_assign_rhs1 (assign),
946 gimple_assign_rhs2 (assign), code);
947
948 case BIT_NOT_EXPR:
949 return expand_vector_parallel (gsi, do_unop, type,
950 gimple_assign_rhs1 (assign),
951 NULL_TREE, code);
952 case EQ_EXPR:
953 case NE_EXPR:
954 case GT_EXPR:
955 case LT_EXPR:
956 case GE_EXPR:
957 case LE_EXPR:
958 case UNEQ_EXPR:
959 case UNGT_EXPR:
960 case UNLT_EXPR:
961 case UNGE_EXPR:
962 case UNLE_EXPR:
963 case LTGT_EXPR:
964 case ORDERED_EXPR:
965 case UNORDERED_EXPR:
966 {
967 tree rhs1 = gimple_assign_rhs1 (assign);
968 tree rhs2 = gimple_assign_rhs2 (assign);
969
970 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
971 }
972
973 case TRUNC_DIV_EXPR:
974 case TRUNC_MOD_EXPR:
975 {
976 tree rhs1 = gimple_assign_rhs1 (assign);
977 tree rhs2 = gimple_assign_rhs2 (assign);
978 tree ret;
979
980 if (!optimize
981 || !VECTOR_INTEGER_TYPE_P (type)
982 || TREE_CODE (rhs2) != VECTOR_CST
983 || !VECTOR_MODE_P (TYPE_MODE (type)))
984 break;
985
986 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
987 if (ret != NULL_TREE)
988 return ret;
989 break;
990 }
991
992 default:
993 break;
994 }
995
996 if (TREE_CODE_CLASS (code) == tcc_unary)
997 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
998 gimple_assign_rhs1 (assign),
999 NULL_TREE, code);
1000 else
1001 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1002 gimple_assign_rhs1 (assign),
1003 gimple_assign_rhs2 (assign), code);
1004 }
1005
1006 /* Try to optimize
1007 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1008 style stmts into:
1009 _9 = { b_7, b_7, b_7, b_7 };
1010 a_5 = _9 + { 0, 3, 6, 9 };
1011 because vector splat operation is usually more efficient
1012 than piecewise initialization of the vector. */
1013
1014 static void
1015 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1016 {
1017 gimple stmt = gsi_stmt (*gsi);
1018 tree lhs = gimple_assign_lhs (stmt);
1019 tree rhs = gimple_assign_rhs1 (stmt);
1020 tree type = TREE_TYPE (rhs);
1021 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1022 bool all_same = true;
1023 constructor_elt *elt;
1024 tree *cst;
1025 gimple g;
1026 tree base = NULL_TREE;
1027 optab op;
1028
1029 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1030 return;
1031 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1032 if (op == unknown_optab
1033 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1034 return;
1035 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1036 if (TREE_CODE (elt->value) != SSA_NAME
1037 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1038 return;
1039 else
1040 {
1041 tree this_base = elt->value;
1042 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1043 all_same = false;
1044 for (j = 0; j < nelts + 1; j++)
1045 {
1046 g = SSA_NAME_DEF_STMT (this_base);
1047 if (is_gimple_assign (g)
1048 && gimple_assign_rhs_code (g) == PLUS_EXPR
1049 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1050 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1051 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1052 this_base = gimple_assign_rhs1 (g);
1053 else
1054 break;
1055 }
1056 if (i == 0)
1057 base = this_base;
1058 else if (this_base != base)
1059 return;
1060 }
1061 if (all_same)
1062 return;
1063 cst = XALLOCAVEC (tree, nelts);
1064 for (i = 0; i < nelts; i++)
1065 {
1066 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1067 cst[i] = build_zero_cst (TREE_TYPE (base));
1068 while (this_base != base)
1069 {
1070 g = SSA_NAME_DEF_STMT (this_base);
1071 cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1072 cst[i], gimple_assign_rhs2 (g));
1073 if (cst[i] == NULL_TREE
1074 || TREE_CODE (cst[i]) != INTEGER_CST
1075 || TREE_OVERFLOW (cst[i]))
1076 return;
1077 this_base = gimple_assign_rhs1 (g);
1078 }
1079 }
1080 for (i = 0; i < nelts; i++)
1081 CONSTRUCTOR_ELT (rhs, i)->value = base;
1082 g = gimple_build_assign (make_ssa_name (type, NULL), rhs);
1083 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1084 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, gimple_assign_lhs (g),
1085 build_vector (type, cst));
1086 gsi_replace (gsi, g, false);
1087 }
1088 \f
1089 /* Return a type for the widest vector mode whose components are of type
1090 TYPE, or NULL_TREE if none is found. */
1091
1092 static tree
1093 type_for_widest_vector_mode (tree type, optab op)
1094 {
1095 machine_mode inner_mode = TYPE_MODE (type);
1096 machine_mode best_mode = VOIDmode, mode;
1097 int best_nunits = 0;
1098
1099 if (SCALAR_FLOAT_MODE_P (inner_mode))
1100 mode = MIN_MODE_VECTOR_FLOAT;
1101 else if (SCALAR_FRACT_MODE_P (inner_mode))
1102 mode = MIN_MODE_VECTOR_FRACT;
1103 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1104 mode = MIN_MODE_VECTOR_UFRACT;
1105 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1106 mode = MIN_MODE_VECTOR_ACCUM;
1107 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1108 mode = MIN_MODE_VECTOR_UACCUM;
1109 else
1110 mode = MIN_MODE_VECTOR_INT;
1111
1112 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1113 if (GET_MODE_INNER (mode) == inner_mode
1114 && GET_MODE_NUNITS (mode) > best_nunits
1115 && optab_handler (op, mode) != CODE_FOR_nothing)
1116 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1117
1118 if (best_mode == VOIDmode)
1119 return NULL_TREE;
1120 else
1121 return build_vector_type_for_mode (type, best_mode);
1122 }
1123
1124
1125 /* Build a reference to the element of the vector VECT. Function
1126 returns either the element itself, either BIT_FIELD_REF, or an
1127 ARRAY_REF expression.
1128
1129 GSI is required to insert temporary variables while building a
1130 refernece to the element of the vector VECT.
1131
1132 PTMPVEC is a pointer to the temporary variable for caching
1133 purposes. In case when PTMPVEC is NULL new temporary variable
1134 will be created. */
1135 static tree
1136 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1137 {
1138 tree vect_type, vect_elt_type;
1139 gimple asgn;
1140 tree tmpvec;
1141 tree arraytype;
1142 bool need_asgn = true;
1143 unsigned int elements;
1144
1145 vect_type = TREE_TYPE (vect);
1146 vect_elt_type = TREE_TYPE (vect_type);
1147 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1148
1149 if (TREE_CODE (idx) == INTEGER_CST)
1150 {
1151 unsigned HOST_WIDE_INT index;
1152
1153 /* Given that we're about to compute a binary modulus,
1154 we don't care about the high bits of the value. */
1155 index = TREE_INT_CST_LOW (idx);
1156 if (!tree_fits_uhwi_p (idx) || index >= elements)
1157 {
1158 index &= elements - 1;
1159 idx = build_int_cst (TREE_TYPE (idx), index);
1160 }
1161
1162 /* When lowering a vector statement sequence do some easy
1163 simplification by looking through intermediate vector results. */
1164 if (TREE_CODE (vect) == SSA_NAME)
1165 {
1166 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1167 if (is_gimple_assign (def_stmt)
1168 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1169 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1170 vect = gimple_assign_rhs1 (def_stmt);
1171 }
1172
1173 if (TREE_CODE (vect) == VECTOR_CST)
1174 return VECTOR_CST_ELT (vect, index);
1175 else if (TREE_CODE (vect) == CONSTRUCTOR
1176 && (CONSTRUCTOR_NELTS (vect) == 0
1177 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1178 != VECTOR_TYPE))
1179 {
1180 if (index < CONSTRUCTOR_NELTS (vect))
1181 return CONSTRUCTOR_ELT (vect, index)->value;
1182 return build_zero_cst (vect_elt_type);
1183 }
1184 else
1185 {
1186 tree size = TYPE_SIZE (vect_elt_type);
1187 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1188 size);
1189 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1190 }
1191 }
1192
1193 if (!ptmpvec)
1194 tmpvec = create_tmp_var (vect_type, "vectmp");
1195 else if (!*ptmpvec)
1196 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1197 else
1198 {
1199 tmpvec = *ptmpvec;
1200 need_asgn = false;
1201 }
1202
1203 if (need_asgn)
1204 {
1205 TREE_ADDRESSABLE (tmpvec) = 1;
1206 asgn = gimple_build_assign (tmpvec, vect);
1207 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1208 }
1209
1210 arraytype = build_array_type_nelts (vect_elt_type, elements);
1211 return build4 (ARRAY_REF, vect_elt_type,
1212 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1213 idx, NULL_TREE, NULL_TREE);
1214 }
1215
1216 /* Check if VEC_PERM_EXPR within the given setting is supported
1217 by hardware, or lower it piecewise.
1218
1219 When VEC_PERM_EXPR has the same first and second operands:
1220 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1221 {v0[mask[0]], v0[mask[1]], ...}
1222 MASK and V0 must have the same number of elements.
1223
1224 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1225 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1226 V0 and V1 must have the same type. MASK, V0, V1 must have the
1227 same number of arguments. */
1228
1229 static void
1230 lower_vec_perm (gimple_stmt_iterator *gsi)
1231 {
1232 gimple stmt = gsi_stmt (*gsi);
1233 tree mask = gimple_assign_rhs3 (stmt);
1234 tree vec0 = gimple_assign_rhs1 (stmt);
1235 tree vec1 = gimple_assign_rhs2 (stmt);
1236 tree vect_type = TREE_TYPE (vec0);
1237 tree mask_type = TREE_TYPE (mask);
1238 tree vect_elt_type = TREE_TYPE (vect_type);
1239 tree mask_elt_type = TREE_TYPE (mask_type);
1240 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1241 vec<constructor_elt, va_gc> *v;
1242 tree constr, t, si, i_val;
1243 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1244 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1245 location_t loc = gimple_location (gsi_stmt (*gsi));
1246 unsigned i;
1247
1248 if (TREE_CODE (mask) == SSA_NAME)
1249 {
1250 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1251 if (is_gimple_assign (def_stmt)
1252 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1253 mask = gimple_assign_rhs1 (def_stmt);
1254 }
1255
1256 if (TREE_CODE (mask) == VECTOR_CST)
1257 {
1258 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1259
1260 for (i = 0; i < elements; ++i)
1261 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1262 & (2 * elements - 1));
1263
1264 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1265 {
1266 gimple_assign_set_rhs3 (stmt, mask);
1267 update_stmt (stmt);
1268 return;
1269 }
1270 }
1271 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1272 return;
1273
1274 warning_at (loc, OPT_Wvector_operation_performance,
1275 "vector shuffling operation will be expanded piecewise");
1276
1277 vec_alloc (v, elements);
1278 for (i = 0; i < elements; i++)
1279 {
1280 si = size_int (i);
1281 i_val = vector_element (gsi, mask, si, &masktmp);
1282
1283 if (TREE_CODE (i_val) == INTEGER_CST)
1284 {
1285 unsigned HOST_WIDE_INT index;
1286
1287 index = TREE_INT_CST_LOW (i_val);
1288 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1289 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1290
1291 if (two_operand_p && (index & elements) != 0)
1292 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1293 else
1294 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1295
1296 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1297 true, GSI_SAME_STMT);
1298 }
1299 else
1300 {
1301 tree cond = NULL_TREE, v0_val;
1302
1303 if (two_operand_p)
1304 {
1305 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1306 build_int_cst (mask_elt_type, elements));
1307 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1308 true, GSI_SAME_STMT);
1309 }
1310
1311 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1312 build_int_cst (mask_elt_type, elements - 1));
1313 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1314 true, GSI_SAME_STMT);
1315
1316 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1317 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1318 true, GSI_SAME_STMT);
1319
1320 if (two_operand_p)
1321 {
1322 tree v1_val;
1323
1324 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1325 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1326 true, GSI_SAME_STMT);
1327
1328 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1329 cond, build_zero_cst (mask_elt_type));
1330 cond = fold_build3 (COND_EXPR, vect_elt_type,
1331 cond, v0_val, v1_val);
1332 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1333 true, GSI_SAME_STMT);
1334 }
1335 else
1336 t = v0_val;
1337 }
1338
1339 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1340 }
1341
1342 constr = build_constructor (vect_type, v);
1343 gimple_assign_set_rhs_from_tree (gsi, constr);
1344 update_stmt (gsi_stmt (*gsi));
1345 }
1346
1347 /* Return type in which CODE operation with optab OP can be
1348 computed. */
1349
1350 static tree
1351 get_compute_type (enum tree_code code, optab op, tree type)
1352 {
1353 /* For very wide vectors, try using a smaller vector mode. */
1354 tree compute_type = type;
1355 if (op
1356 && (!VECTOR_MODE_P (TYPE_MODE (type))
1357 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1358 {
1359 tree vector_compute_type
1360 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1361 if (vector_compute_type != NULL_TREE
1362 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1363 < TYPE_VECTOR_SUBPARTS (compute_type))
1364 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1365 != CODE_FOR_nothing))
1366 compute_type = vector_compute_type;
1367 }
1368
1369 /* If we are breaking a BLKmode vector into smaller pieces,
1370 type_for_widest_vector_mode has already looked into the optab,
1371 so skip these checks. */
1372 if (compute_type == type)
1373 {
1374 machine_mode compute_mode = TYPE_MODE (compute_type);
1375 if (VECTOR_MODE_P (compute_mode))
1376 {
1377 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1378 return compute_type;
1379 if (code == MULT_HIGHPART_EXPR
1380 && can_mult_highpart_p (compute_mode,
1381 TYPE_UNSIGNED (compute_type)))
1382 return compute_type;
1383 }
1384 /* There is no operation in hardware, so fall back to scalars. */
1385 compute_type = TREE_TYPE (type);
1386 }
1387
1388 return compute_type;
1389 }
1390
1391 /* Helper function of expand_vector_operations_1. Return number of
1392 vector elements for vector types or 1 for other types. */
1393
1394 static inline int
1395 count_type_subparts (tree type)
1396 {
1397 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1398 }
1399
1400 /* Process one statement. If we identify a vector operation, expand it. */
1401
1402 static void
1403 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1404 {
1405 gimple stmt = gsi_stmt (*gsi);
1406 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1407 enum tree_code code;
1408 optab op = unknown_optab;
1409 enum gimple_rhs_class rhs_class;
1410 tree new_rhs;
1411
1412 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1413 return;
1414
1415 code = gimple_assign_rhs_code (stmt);
1416 rhs_class = get_gimple_rhs_class (code);
1417 lhs = gimple_assign_lhs (stmt);
1418
1419 if (code == VEC_PERM_EXPR)
1420 {
1421 lower_vec_perm (gsi);
1422 return;
1423 }
1424
1425 if (code == VEC_COND_EXPR)
1426 {
1427 expand_vector_condition (gsi);
1428 return;
1429 }
1430
1431 if (code == CONSTRUCTOR
1432 && TREE_CODE (lhs) == SSA_NAME
1433 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1434 && !gimple_clobber_p (stmt)
1435 && optimize)
1436 {
1437 optimize_vector_constructor (gsi);
1438 return;
1439 }
1440
1441 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1442 return;
1443
1444 rhs1 = gimple_assign_rhs1 (stmt);
1445 type = gimple_expr_type (stmt);
1446 if (rhs_class == GIMPLE_BINARY_RHS)
1447 rhs2 = gimple_assign_rhs2 (stmt);
1448
1449 if (TREE_CODE (type) != VECTOR_TYPE)
1450 return;
1451
1452 if (CONVERT_EXPR_CODE_P (code)
1453 || code == FLOAT_EXPR
1454 || code == FIX_TRUNC_EXPR
1455 || code == VIEW_CONVERT_EXPR)
1456 return;
1457
1458 /* The signedness is determined from input argument. */
1459 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1460 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1461 type = TREE_TYPE (rhs1);
1462
1463 /* For widening/narrowing vector operations, the relevant type is of the
1464 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1465 calculated in the same way above. */
1466 if (code == WIDEN_SUM_EXPR
1467 || code == VEC_WIDEN_MULT_HI_EXPR
1468 || code == VEC_WIDEN_MULT_LO_EXPR
1469 || code == VEC_WIDEN_MULT_EVEN_EXPR
1470 || code == VEC_WIDEN_MULT_ODD_EXPR
1471 || code == VEC_UNPACK_HI_EXPR
1472 || code == VEC_UNPACK_LO_EXPR
1473 || code == VEC_PACK_TRUNC_EXPR
1474 || code == VEC_PACK_SAT_EXPR
1475 || code == VEC_PACK_FIX_TRUNC_EXPR
1476 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1477 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1478 type = TREE_TYPE (rhs1);
1479
1480 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1481 scalar */
1482 if (code == LSHIFT_EXPR
1483 || code == RSHIFT_EXPR
1484 || code == LROTATE_EXPR
1485 || code == RROTATE_EXPR)
1486 {
1487 optab opv;
1488
1489 /* Check whether we have vector <op> {x,x,x,x} where x
1490 could be a scalar variable or a constant. Transform
1491 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1492 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1493 {
1494 tree first;
1495 gimple def_stmt;
1496
1497 if ((TREE_CODE (rhs2) == VECTOR_CST
1498 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1499 || (TREE_CODE (rhs2) == SSA_NAME
1500 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1501 && gimple_assign_single_p (def_stmt)
1502 && (first = uniform_vector_p
1503 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1504 {
1505 gimple_assign_set_rhs2 (stmt, first);
1506 update_stmt (stmt);
1507 rhs2 = first;
1508 }
1509 }
1510
1511 opv = optab_for_tree_code (code, type, optab_vector);
1512 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1513 op = opv;
1514 else
1515 {
1516 op = optab_for_tree_code (code, type, optab_scalar);
1517
1518 compute_type = get_compute_type (code, op, type);
1519 if (compute_type == type)
1520 return;
1521 /* The rtl expander will expand vector/scalar as vector/vector
1522 if necessary. Pick one with wider vector type. */
1523 tree compute_vtype = get_compute_type (code, opv, type);
1524 if (count_type_subparts (compute_vtype)
1525 > count_type_subparts (compute_type))
1526 {
1527 compute_type = compute_vtype;
1528 op = opv;
1529 }
1530 }
1531
1532 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1533 {
1534 if (compute_type == NULL_TREE)
1535 compute_type = get_compute_type (code, op, type);
1536 if (compute_type == type)
1537 return;
1538 /* Before splitting vector rotates into scalar rotates,
1539 see if we can't use vector shifts and BIT_IOR_EXPR
1540 instead. For vector by vector rotates we'd also
1541 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1542 for now, fold doesn't seem to create such rotates anyway. */
1543 if (compute_type == TREE_TYPE (type)
1544 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1545 {
1546 optab oplv = vashl_optab, opl = ashl_optab;
1547 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1548 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1549 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1550 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1551 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1552 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1553 /* The rtl expander will expand vector/scalar as vector/vector
1554 if necessary. Pick one with wider vector type. */
1555 if (count_type_subparts (compute_lvtype)
1556 > count_type_subparts (compute_ltype))
1557 {
1558 compute_ltype = compute_lvtype;
1559 opl = oplv;
1560 }
1561 if (count_type_subparts (compute_rvtype)
1562 > count_type_subparts (compute_rtype))
1563 {
1564 compute_rtype = compute_rvtype;
1565 opr = oprv;
1566 }
1567 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1568 BIT_IOR_EXPR. */
1569 compute_type = compute_ltype;
1570 if (count_type_subparts (compute_type)
1571 > count_type_subparts (compute_rtype))
1572 compute_type = compute_rtype;
1573 if (count_type_subparts (compute_type)
1574 > count_type_subparts (compute_otype))
1575 compute_type = compute_otype;
1576 /* Verify all 3 operations can be performed in that type. */
1577 if (compute_type != TREE_TYPE (type))
1578 {
1579 if (optab_handler (opl, TYPE_MODE (compute_type))
1580 == CODE_FOR_nothing
1581 || optab_handler (opr, TYPE_MODE (compute_type))
1582 == CODE_FOR_nothing
1583 || optab_handler (opo, TYPE_MODE (compute_type))
1584 == CODE_FOR_nothing)
1585 compute_type = TREE_TYPE (type);
1586 }
1587 }
1588 }
1589 }
1590 else
1591 op = optab_for_tree_code (code, type, optab_default);
1592
1593 /* Optabs will try converting a negation into a subtraction, so
1594 look for it as well. TODO: negation of floating-point vectors
1595 might be turned into an exclusive OR toggling the sign bit. */
1596 if (op == unknown_optab
1597 && code == NEGATE_EXPR
1598 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1599 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1600
1601 if (compute_type == NULL_TREE)
1602 compute_type = get_compute_type (code, op, type);
1603 if (compute_type == type)
1604 return;
1605
1606 gcc_assert (code != VEC_RSHIFT_EXPR);
1607 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1608
1609 /* Leave expression untouched for later expansion. */
1610 if (new_rhs == NULL_TREE)
1611 return;
1612
1613 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1614 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1615 new_rhs);
1616
1617 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1618 way to do it is change expand_vector_operation and its callees to
1619 return a tree_code, RHS1 and RHS2 instead of a tree. */
1620 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1621 update_stmt (gsi_stmt (*gsi));
1622 }
1623 \f
1624 /* Use this to lower vector operations introduced by the vectorizer,
1625 if it may need the bit-twiddling tricks implemented in this file. */
1626
1627 static unsigned int
1628 expand_vector_operations (void)
1629 {
1630 gimple_stmt_iterator gsi;
1631 basic_block bb;
1632 bool cfg_changed = false;
1633
1634 FOR_EACH_BB_FN (bb, cfun)
1635 {
1636 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1637 {
1638 expand_vector_operations_1 (&gsi);
1639 /* ??? If we do not cleanup EH then we will ICE in
1640 verification. But in reality we have created wrong-code
1641 as we did not properly transition EH info and edges to
1642 the piecewise computations. */
1643 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1644 && gimple_purge_dead_eh_edges (bb))
1645 cfg_changed = true;
1646 }
1647 }
1648
1649 return cfg_changed ? TODO_cleanup_cfg : 0;
1650 }
1651
1652 namespace {
1653
1654 const pass_data pass_data_lower_vector =
1655 {
1656 GIMPLE_PASS, /* type */
1657 "veclower", /* name */
1658 OPTGROUP_VEC, /* optinfo_flags */
1659 TV_NONE, /* tv_id */
1660 PROP_cfg, /* properties_required */
1661 PROP_gimple_lvec, /* properties_provided */
1662 0, /* properties_destroyed */
1663 0, /* todo_flags_start */
1664 ( TODO_update_ssa
1665 | TODO_cleanup_cfg ), /* todo_flags_finish */
1666 };
1667
1668 class pass_lower_vector : public gimple_opt_pass
1669 {
1670 public:
1671 pass_lower_vector (gcc::context *ctxt)
1672 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1673 {}
1674
1675 /* opt_pass methods: */
1676 virtual bool gate (function *fun)
1677 {
1678 return !(fun->curr_properties & PROP_gimple_lvec);
1679 }
1680
1681 virtual unsigned int execute (function *)
1682 {
1683 return expand_vector_operations ();
1684 }
1685
1686 }; // class pass_lower_vector
1687
1688 } // anon namespace
1689
1690 gimple_opt_pass *
1691 make_pass_lower_vector (gcc::context *ctxt)
1692 {
1693 return new pass_lower_vector (ctxt);
1694 }
1695
1696 namespace {
1697
1698 const pass_data pass_data_lower_vector_ssa =
1699 {
1700 GIMPLE_PASS, /* type */
1701 "veclower2", /* name */
1702 OPTGROUP_VEC, /* optinfo_flags */
1703 TV_NONE, /* tv_id */
1704 PROP_cfg, /* properties_required */
1705 PROP_gimple_lvec, /* properties_provided */
1706 0, /* properties_destroyed */
1707 0, /* todo_flags_start */
1708 ( TODO_update_ssa
1709 | TODO_cleanup_cfg ), /* todo_flags_finish */
1710 };
1711
1712 class pass_lower_vector_ssa : public gimple_opt_pass
1713 {
1714 public:
1715 pass_lower_vector_ssa (gcc::context *ctxt)
1716 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1717 {}
1718
1719 /* opt_pass methods: */
1720 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1721 virtual unsigned int execute (function *)
1722 {
1723 return expand_vector_operations ();
1724 }
1725
1726 }; // class pass_lower_vector_ssa
1727
1728 } // anon namespace
1729
1730 gimple_opt_pass *
1731 make_pass_lower_vector_ssa (gcc::context *ctxt)
1732 {
1733 return new pass_lower_vector_ssa (ctxt);
1734 }
1735
1736 #include "gt-tree-vect-generic.h"