invoke.texi: Update -fopt-info documentation.
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34 #include "target.h"
35
36 /* Need to include rtl.h, expr.h, etc. for optabs. */
37 #include "expr.h"
38 #include "optabs.h"
39
40
41 static void expand_vector_operations_1 (gimple_stmt_iterator *);
42
43
44 /* Build a constant of type TYPE, made of VALUE's bits replicated
45 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
46 static tree
47 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
48 {
49 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
50 int n = HOST_BITS_PER_WIDE_INT / width;
51 unsigned HOST_WIDE_INT low, high, mask;
52 tree ret;
53
54 gcc_assert (n);
55
56 if (width == HOST_BITS_PER_WIDE_INT)
57 low = value;
58 else
59 {
60 mask = ((HOST_WIDE_INT)1 << width) - 1;
61 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
62 }
63
64 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
65 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
66 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
67 high = 0;
68 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
69 high = low;
70 else
71 gcc_unreachable ();
72
73 ret = build_int_cst_wide (type, low, high);
74 return ret;
75 }
76
77 static GTY(()) tree vector_inner_type;
78 static GTY(()) tree vector_last_type;
79 static GTY(()) int vector_last_nunits;
80
81 /* Return a suitable vector types made of SUBPARTS units each of mode
82 "word_mode" (the global variable). */
83 static tree
84 build_word_mode_vector_type (int nunits)
85 {
86 if (!vector_inner_type)
87 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
88 else if (vector_last_nunits == nunits)
89 {
90 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
91 return vector_last_type;
92 }
93
94 /* We build a new type, but we canonicalize it nevertheless,
95 because it still saves some memory. */
96 vector_last_nunits = nunits;
97 vector_last_type = type_hash_canon (nunits,
98 build_vector_type (vector_inner_type,
99 nunits));
100 return vector_last_type;
101 }
102
103 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
104 tree, tree, tree, tree, tree, enum tree_code);
105
106 static inline tree
107 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
108 tree t, tree bitsize, tree bitpos)
109 {
110 if (bitpos)
111 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
112 else
113 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
114 }
115
116 static tree
117 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
118 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
119 enum tree_code code)
120 {
121 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
122 return gimplify_build1 (gsi, code, inner_type, a);
123 }
124
125 static tree
126 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
127 tree bitpos, tree bitsize, enum tree_code code)
128 {
129 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
130 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
131 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
132 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
133 return gimplify_build2 (gsi, code, inner_type, a, b);
134 }
135
136 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
137
138 INNER_TYPE is the type of A and B elements
139
140 returned expression is of signed integer type with the
141 size equal to the size of INNER_TYPE. */
142 static tree
143 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
144 tree bitpos, tree bitsize, enum tree_code code)
145 {
146 tree comp_type;
147
148 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
149 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
150
151 comp_type = build_nonstandard_integer_type
152 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
153
154 return gimplify_build3 (gsi, COND_EXPR, comp_type,
155 fold_build2 (code, boolean_type_node, a, b),
156 build_int_cst (comp_type, -1),
157 build_int_cst (comp_type, 0));
158 }
159
160 /* Expand vector addition to scalars. This does bit twiddling
161 in order to increase parallelism:
162
163 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
164 (a ^ b) & 0x80808080
165
166 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
167 (a ^ ~b) & 0x80808080
168
169 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
170
171 This optimization should be done only if 4 vector items or more
172 fit into a word. */
173 static tree
174 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
175 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
176 enum tree_code code)
177 {
178 tree inner_type = TREE_TYPE (TREE_TYPE (a));
179 unsigned HOST_WIDE_INT max;
180 tree low_bits, high_bits, a_low, b_low, result_low, signs;
181
182 max = GET_MODE_MASK (TYPE_MODE (inner_type));
183 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
184 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
185
186 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
187 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
188
189 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
190 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
191 if (code == PLUS_EXPR)
192 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
193 else
194 {
195 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
196 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
197 }
198
199 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
200 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
201 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
202 }
203
204 static tree
205 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
206 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
207 tree bitsize ATTRIBUTE_UNUSED,
208 enum tree_code code ATTRIBUTE_UNUSED)
209 {
210 tree inner_type = TREE_TYPE (TREE_TYPE (b));
211 HOST_WIDE_INT max;
212 tree low_bits, high_bits, b_low, result_low, signs;
213
214 max = GET_MODE_MASK (TYPE_MODE (inner_type));
215 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
216 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
217
218 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
219
220 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
221 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
222 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
223 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
224 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
225 }
226
227 /* Expand a vector operation to scalars, by using many operations
228 whose type is the vector type's inner type. */
229 static tree
230 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
231 tree type, tree inner_type,
232 tree a, tree b, enum tree_code code)
233 {
234 VEC(constructor_elt,gc) *v;
235 tree part_width = TYPE_SIZE (inner_type);
236 tree index = bitsize_int (0);
237 int nunits = TYPE_VECTOR_SUBPARTS (type);
238 int delta = tree_low_cst (part_width, 1)
239 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
240 int i;
241 location_t loc = gimple_location (gsi_stmt (*gsi));
242
243 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
244 warning_at (loc, OPT_Wvector_operation_performance,
245 "vector operation will be expanded piecewise");
246 else
247 warning_at (loc, OPT_Wvector_operation_performance,
248 "vector operation will be expanded in parallel");
249
250 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
251 for (i = 0; i < nunits;
252 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
253 {
254 tree result = f (gsi, inner_type, a, b, index, part_width, code);
255 constructor_elt ce = {NULL_TREE, result};
256 VEC_quick_push (constructor_elt, v, ce);
257 }
258
259 return build_constructor (type, v);
260 }
261
262 /* Expand a vector operation to scalars with the freedom to use
263 a scalar integer type, or to use a different size for the items
264 in the vector type. */
265 static tree
266 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
267 tree a, tree b,
268 enum tree_code code)
269 {
270 tree result, compute_type;
271 enum machine_mode mode;
272 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
273 location_t loc = gimple_location (gsi_stmt (*gsi));
274
275 /* We have three strategies. If the type is already correct, just do
276 the operation an element at a time. Else, if the vector is wider than
277 one word, do it a word at a time; finally, if the vector is smaller
278 than one word, do it as a scalar. */
279 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
280 return expand_vector_piecewise (gsi, f,
281 type, TREE_TYPE (type),
282 a, b, code);
283 else if (n_words > 1)
284 {
285 tree word_type = build_word_mode_vector_type (n_words);
286 result = expand_vector_piecewise (gsi, f,
287 word_type, TREE_TYPE (word_type),
288 a, b, code);
289 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
290 GSI_SAME_STMT);
291 }
292 else
293 {
294 /* Use a single scalar operation with a mode no wider than word_mode. */
295 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
296 compute_type = lang_hooks.types.type_for_mode (mode, 1);
297 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
298 warning_at (loc, OPT_Wvector_operation_performance,
299 "vector operation will be expanded with a "
300 "single scalar operation");
301 }
302
303 return result;
304 }
305
306 /* Expand a vector operation to scalars; for integer types we can use
307 special bit twiddling tricks to do the sums a word at a time, using
308 function F_PARALLEL instead of F. These tricks are done only if
309 they can process at least four items, that is, only if the vector
310 holds at least four items and if a word can hold four items. */
311 static tree
312 expand_vector_addition (gimple_stmt_iterator *gsi,
313 elem_op_func f, elem_op_func f_parallel,
314 tree type, tree a, tree b, enum tree_code code)
315 {
316 int parts_per_word = UNITS_PER_WORD
317 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
318
319 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
320 && parts_per_word >= 4
321 && TYPE_VECTOR_SUBPARTS (type) >= 4)
322 return expand_vector_parallel (gsi, f_parallel,
323 type, a, b, code);
324 else
325 return expand_vector_piecewise (gsi, f,
326 type, TREE_TYPE (type),
327 a, b, code);
328 }
329
330 /* Check if vector VEC consists of all the equal elements and
331 that the number of elements corresponds to the type of VEC.
332 The function returns first element of the vector
333 or NULL_TREE if the vector is not uniform. */
334 static tree
335 uniform_vector_p (tree vec)
336 {
337 tree first, t;
338 unsigned i;
339
340 if (vec == NULL_TREE)
341 return NULL_TREE;
342
343 if (TREE_CODE (vec) == VECTOR_CST)
344 {
345 first = VECTOR_CST_ELT (vec, 0);
346 for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
347 if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
348 return NULL_TREE;
349
350 return first;
351 }
352
353 else if (TREE_CODE (vec) == CONSTRUCTOR)
354 {
355 first = error_mark_node;
356
357 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
358 {
359 if (i == 0)
360 {
361 first = t;
362 continue;
363 }
364 if (!operand_equal_p (first, t, 0))
365 return NULL_TREE;
366 }
367 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
368 return NULL_TREE;
369
370 return first;
371 }
372
373 return NULL_TREE;
374 }
375
376 /* Try to expand vector comparison expression OP0 CODE OP1 by
377 querying optab if the following expression:
378 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
379 can be expanded. */
380 static tree
381 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
382 tree op1, enum tree_code code)
383 {
384 tree t;
385 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
386 t = expand_vector_piecewise (gsi, do_compare, type,
387 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
388 else
389 t = NULL_TREE;
390
391 return t;
392 }
393
394 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
395 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
396 the result if successful, otherwise return NULL_TREE. */
397 static tree
398 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
399 {
400 optab op;
401 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
402 bool scalar_shift = true;
403
404 for (i = 1; i < nunits; i++)
405 {
406 if (shiftcnts[i] != shiftcnts[0])
407 scalar_shift = false;
408 }
409
410 if (scalar_shift && shiftcnts[0] == 0)
411 return op0;
412
413 if (scalar_shift)
414 {
415 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
416 if (op != unknown_optab
417 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
418 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
419 build_int_cst (NULL_TREE, shiftcnts[0]));
420 }
421
422 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
423 if (op != unknown_optab
424 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
425 {
426 tree *vec = XALLOCAVEC (tree, nunits);
427 for (i = 0; i < nunits; i++)
428 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
429 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
430 build_vector (type, vec));
431 }
432
433 return NULL_TREE;
434 }
435
436 /* Try to expand integer vector division by constant using
437 widening multiply, shifts and additions. */
438 static tree
439 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
440 tree op1, enum tree_code code)
441 {
442 bool use_pow2 = true;
443 bool has_vector_shift = true;
444 int mode = -1, this_mode;
445 int pre_shift = -1, post_shift;
446 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
447 int *shifts = XALLOCAVEC (int, nunits * 4);
448 int *pre_shifts = shifts + nunits;
449 int *post_shifts = pre_shifts + nunits;
450 int *shift_temps = post_shifts + nunits;
451 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
452 int prec = TYPE_PRECISION (TREE_TYPE (type));
453 int dummy_int;
454 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
455 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
456 tree *vec;
457 tree cur_op, mulcst, tem;
458 optab op;
459
460 if (prec > HOST_BITS_PER_WIDE_INT)
461 return NULL_TREE;
462
463 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
464 if (op == unknown_optab
465 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
466 has_vector_shift = false;
467
468 /* Analysis phase. Determine if all op1 elements are either power
469 of two and it is possible to expand it using shifts (or for remainder
470 using masking). Additionally compute the multiplicative constants
471 and pre and post shifts if the division is to be expanded using
472 widening or high part multiplication plus shifts. */
473 for (i = 0; i < nunits; i++)
474 {
475 tree cst = VECTOR_CST_ELT (op1, i);
476 unsigned HOST_WIDE_INT ml;
477
478 if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
479 return NULL_TREE;
480 pre_shifts[i] = 0;
481 post_shifts[i] = 0;
482 mulc[i] = 0;
483 if (use_pow2
484 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
485 use_pow2 = false;
486 if (use_pow2)
487 {
488 shifts[i] = tree_log2 (cst);
489 if (shifts[i] != shifts[0]
490 && code == TRUNC_DIV_EXPR
491 && !has_vector_shift)
492 use_pow2 = false;
493 }
494 if (mode == -2)
495 continue;
496 if (unsignedp)
497 {
498 unsigned HOST_WIDE_INT mh;
499 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
500
501 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
502 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
503 return NULL_TREE;
504
505 if (d <= 1)
506 {
507 mode = -2;
508 continue;
509 }
510
511 /* Find a suitable multiplier and right shift count
512 instead of multiplying with D. */
513 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
514
515 /* If the suggested multiplier is more than SIZE bits, we can
516 do better for even divisors, using an initial right shift. */
517 if ((mh != 0 && (d & 1) == 0)
518 || (!has_vector_shift && pre_shift != -1))
519 {
520 if (has_vector_shift)
521 pre_shift = floor_log2 (d & -d);
522 else if (pre_shift == -1)
523 {
524 unsigned int j;
525 for (j = 0; j < nunits; j++)
526 {
527 tree cst2 = VECTOR_CST_ELT (op1, j);
528 unsigned HOST_WIDE_INT d2;
529 int this_pre_shift;
530
531 if (!host_integerp (cst2, 1))
532 return NULL_TREE;
533 d2 = tree_low_cst (cst2, 1) & mask;
534 if (d2 == 0)
535 return NULL_TREE;
536 this_pre_shift = floor_log2 (d2 & -d2);
537 if (pre_shift == -1 || this_pre_shift < pre_shift)
538 pre_shift = this_pre_shift;
539 }
540 if (i != 0 && pre_shift != 0)
541 {
542 /* Restart. */
543 i = -1U;
544 mode = -1;
545 continue;
546 }
547 }
548 if (pre_shift != 0)
549 {
550 if ((d >> pre_shift) <= 1)
551 {
552 mode = -2;
553 continue;
554 }
555 mh = choose_multiplier (d >> pre_shift, prec,
556 prec - pre_shift,
557 &ml, &post_shift, &dummy_int);
558 gcc_assert (!mh);
559 pre_shifts[i] = pre_shift;
560 }
561 }
562 if (!mh)
563 this_mode = 0;
564 else
565 this_mode = 1;
566 }
567 else
568 {
569 HOST_WIDE_INT d = tree_low_cst (cst, 0);
570 unsigned HOST_WIDE_INT abs_d;
571
572 if (d == -1)
573 return NULL_TREE;
574
575 /* Since d might be INT_MIN, we have to cast to
576 unsigned HOST_WIDE_INT before negating to avoid
577 undefined signed overflow. */
578 abs_d = (d >= 0
579 ? (unsigned HOST_WIDE_INT) d
580 : - (unsigned HOST_WIDE_INT) d);
581
582 /* n rem d = n rem -d */
583 if (code == TRUNC_MOD_EXPR && d < 0)
584 d = abs_d;
585 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
586 {
587 /* This case is not handled correctly below. */
588 mode = -2;
589 continue;
590 }
591 if (abs_d <= 1)
592 {
593 mode = -2;
594 continue;
595 }
596
597 choose_multiplier (abs_d, prec, prec - 1, &ml,
598 &post_shift, &dummy_int);
599 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
600 {
601 this_mode = 4 + (d < 0);
602 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
603 }
604 else
605 this_mode = 2 + (d < 0);
606 }
607 mulc[i] = ml;
608 post_shifts[i] = post_shift;
609 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
610 || post_shift >= prec
611 || pre_shifts[i] >= prec)
612 this_mode = -2;
613
614 if (i == 0)
615 mode = this_mode;
616 else if (mode != this_mode)
617 mode = -2;
618 }
619
620 vec = XALLOCAVEC (tree, nunits);
621
622 if (use_pow2)
623 {
624 tree addend = NULL_TREE;
625 if (!unsignedp)
626 {
627 tree uns_type;
628
629 /* Both division and remainder sequences need
630 op0 < 0 ? mask : 0 computed. It can be either computed as
631 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
632 if none of the shifts is 0, or as the conditional. */
633 for (i = 0; i < nunits; i++)
634 if (shifts[i] == 0)
635 break;
636 uns_type
637 = build_vector_type (build_nonstandard_integer_type (prec, 1),
638 nunits);
639 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
640 {
641 for (i = 0; i < nunits; i++)
642 shift_temps[i] = prec - 1;
643 cur_op = add_rshift (gsi, type, op0, shift_temps);
644 if (cur_op != NULL_TREE)
645 {
646 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
647 uns_type, cur_op);
648 for (i = 0; i < nunits; i++)
649 shift_temps[i] = prec - shifts[i];
650 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
651 if (cur_op != NULL_TREE)
652 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
653 type, cur_op);
654 }
655 }
656 if (addend == NULL_TREE
657 && expand_vec_cond_expr_p (type, type))
658 {
659 tree zero, cst, cond;
660 gimple stmt;
661
662 zero = build_zero_cst (type);
663 cond = build2 (LT_EXPR, type, op0, zero);
664 for (i = 0; i < nunits; i++)
665 vec[i] = build_int_cst (TREE_TYPE (type),
666 ((unsigned HOST_WIDE_INT) 1
667 << shifts[i]) - 1);
668 cst = build_vector (type, vec);
669 addend = make_ssa_name (type, NULL);
670 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
671 cond, cst, zero);
672 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
673 }
674 }
675 if (code == TRUNC_DIV_EXPR)
676 {
677 if (unsignedp)
678 {
679 /* q = op0 >> shift; */
680 cur_op = add_rshift (gsi, type, op0, shifts);
681 if (cur_op != NULL_TREE)
682 return cur_op;
683 }
684 else if (addend != NULL_TREE)
685 {
686 /* t1 = op0 + addend;
687 q = t1 >> shift; */
688 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
689 if (op != unknown_optab
690 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
691 {
692 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
693 cur_op = add_rshift (gsi, type, cur_op, shifts);
694 if (cur_op != NULL_TREE)
695 return cur_op;
696 }
697 }
698 }
699 else
700 {
701 tree mask;
702 for (i = 0; i < nunits; i++)
703 vec[i] = build_int_cst (TREE_TYPE (type),
704 ((unsigned HOST_WIDE_INT) 1
705 << shifts[i]) - 1);
706 mask = build_vector (type, vec);
707 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
708 if (op != unknown_optab
709 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
710 {
711 if (unsignedp)
712 /* r = op0 & mask; */
713 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
714 else if (addend != NULL_TREE)
715 {
716 /* t1 = op0 + addend;
717 t2 = t1 & mask;
718 r = t2 - addend; */
719 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
720 if (op != unknown_optab
721 && optab_handler (op, TYPE_MODE (type))
722 != CODE_FOR_nothing)
723 {
724 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
725 addend);
726 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
727 cur_op, mask);
728 op = optab_for_tree_code (MINUS_EXPR, type,
729 optab_default);
730 if (op != unknown_optab
731 && optab_handler (op, TYPE_MODE (type))
732 != CODE_FOR_nothing)
733 return gimplify_build2 (gsi, MINUS_EXPR, type,
734 cur_op, addend);
735 }
736 }
737 }
738 }
739 }
740
741 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
742 return NULL_TREE;
743
744 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
745 return NULL_TREE;
746
747 cur_op = op0;
748
749 switch (mode)
750 {
751 case 0:
752 gcc_assert (unsignedp);
753 /* t1 = oprnd0 >> pre_shift;
754 t2 = t1 h* ml;
755 q = t2 >> post_shift; */
756 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
757 if (cur_op == NULL_TREE)
758 return NULL_TREE;
759 break;
760 case 1:
761 gcc_assert (unsignedp);
762 for (i = 0; i < nunits; i++)
763 {
764 shift_temps[i] = 1;
765 post_shifts[i]--;
766 }
767 break;
768 case 2:
769 case 3:
770 case 4:
771 case 5:
772 gcc_assert (!unsignedp);
773 for (i = 0; i < nunits; i++)
774 shift_temps[i] = prec - 1;
775 break;
776 default:
777 return NULL_TREE;
778 }
779
780 for (i = 0; i < nunits; i++)
781 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
782 mulcst = build_vector (type, vec);
783
784 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
785
786 switch (mode)
787 {
788 case 0:
789 /* t1 = oprnd0 >> pre_shift;
790 t2 = t1 h* ml;
791 q = t2 >> post_shift; */
792 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
793 break;
794 case 1:
795 /* t1 = oprnd0 h* ml;
796 t2 = oprnd0 - t1;
797 t3 = t2 >> 1;
798 t4 = t1 + t3;
799 q = t4 >> (post_shift - 1); */
800 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
801 if (op == unknown_optab
802 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
803 return NULL_TREE;
804 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
805 tem = add_rshift (gsi, type, tem, shift_temps);
806 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
807 if (op == unknown_optab
808 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
809 return NULL_TREE;
810 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
811 cur_op = add_rshift (gsi, type, tem, post_shifts);
812 if (cur_op == NULL_TREE)
813 return NULL_TREE;
814 break;
815 case 2:
816 case 3:
817 case 4:
818 case 5:
819 /* t1 = oprnd0 h* ml;
820 t2 = t1; [ iff (mode & 2) != 0 ]
821 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
822 t3 = t2 >> post_shift;
823 t4 = oprnd0 >> (prec - 1);
824 q = t3 - t4; [ iff (mode & 1) == 0 ]
825 q = t4 - t3; [ iff (mode & 1) != 0 ] */
826 if ((mode & 2) == 0)
827 {
828 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
829 if (op == unknown_optab
830 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
831 return NULL_TREE;
832 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
833 }
834 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
835 if (cur_op == NULL_TREE)
836 return NULL_TREE;
837 tem = add_rshift (gsi, type, op0, shift_temps);
838 if (tem == NULL_TREE)
839 return NULL_TREE;
840 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
841 if (op == unknown_optab
842 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
843 return NULL_TREE;
844 if ((mode & 1) == 0)
845 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
846 else
847 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
848 break;
849 default:
850 gcc_unreachable ();
851 }
852
853 if (code == TRUNC_DIV_EXPR)
854 return cur_op;
855
856 /* We divided. Now finish by:
857 t1 = q * oprnd1;
858 r = oprnd0 - t1; */
859 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
860 if (op == unknown_optab
861 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
862 return NULL_TREE;
863 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
864 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
865 if (op == unknown_optab
866 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
867 return NULL_TREE;
868 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
869 }
870
871 static tree
872 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
873 gimple assign, enum tree_code code)
874 {
875 enum machine_mode compute_mode = TYPE_MODE (compute_type);
876
877 /* If the compute mode is not a vector mode (hence we are not decomposing
878 a BLKmode vector to smaller, hardware-supported vectors), we may want
879 to expand the operations in parallel. */
880 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
881 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
882 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
883 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
884 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
885 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
886 switch (code)
887 {
888 case PLUS_EXPR:
889 case MINUS_EXPR:
890 if (!TYPE_OVERFLOW_TRAPS (type))
891 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
892 gimple_assign_rhs1 (assign),
893 gimple_assign_rhs2 (assign), code);
894 break;
895
896 case NEGATE_EXPR:
897 if (!TYPE_OVERFLOW_TRAPS (type))
898 return expand_vector_addition (gsi, do_unop, do_negate, type,
899 gimple_assign_rhs1 (assign),
900 NULL_TREE, code);
901 break;
902
903 case BIT_AND_EXPR:
904 case BIT_IOR_EXPR:
905 case BIT_XOR_EXPR:
906 return expand_vector_parallel (gsi, do_binop, type,
907 gimple_assign_rhs1 (assign),
908 gimple_assign_rhs2 (assign), code);
909
910 case BIT_NOT_EXPR:
911 return expand_vector_parallel (gsi, do_unop, type,
912 gimple_assign_rhs1 (assign),
913 NULL_TREE, code);
914 case EQ_EXPR:
915 case NE_EXPR:
916 case GT_EXPR:
917 case LT_EXPR:
918 case GE_EXPR:
919 case LE_EXPR:
920 case UNEQ_EXPR:
921 case UNGT_EXPR:
922 case UNLT_EXPR:
923 case UNGE_EXPR:
924 case UNLE_EXPR:
925 case LTGT_EXPR:
926 case ORDERED_EXPR:
927 case UNORDERED_EXPR:
928 {
929 tree rhs1 = gimple_assign_rhs1 (assign);
930 tree rhs2 = gimple_assign_rhs2 (assign);
931
932 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
933 }
934
935 case TRUNC_DIV_EXPR:
936 case TRUNC_MOD_EXPR:
937 {
938 tree rhs1 = gimple_assign_rhs1 (assign);
939 tree rhs2 = gimple_assign_rhs2 (assign);
940 tree ret;
941
942 if (!optimize
943 || !VECTOR_INTEGER_TYPE_P (type)
944 || TREE_CODE (rhs2) != VECTOR_CST)
945 break;
946
947 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
948 if (ret != NULL_TREE)
949 return ret;
950 break;
951 }
952
953 default:
954 break;
955 }
956
957 if (TREE_CODE_CLASS (code) == tcc_unary)
958 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
959 gimple_assign_rhs1 (assign),
960 NULL_TREE, code);
961 else
962 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
963 gimple_assign_rhs1 (assign),
964 gimple_assign_rhs2 (assign), code);
965 }
966 \f
967 /* Return a type for the widest vector mode whose components are of type
968 TYPE, or NULL_TREE if none is found. */
969
970 static tree
971 type_for_widest_vector_mode (tree type, optab op)
972 {
973 enum machine_mode inner_mode = TYPE_MODE (type);
974 enum machine_mode best_mode = VOIDmode, mode;
975 int best_nunits = 0;
976
977 if (SCALAR_FLOAT_MODE_P (inner_mode))
978 mode = MIN_MODE_VECTOR_FLOAT;
979 else if (SCALAR_FRACT_MODE_P (inner_mode))
980 mode = MIN_MODE_VECTOR_FRACT;
981 else if (SCALAR_UFRACT_MODE_P (inner_mode))
982 mode = MIN_MODE_VECTOR_UFRACT;
983 else if (SCALAR_ACCUM_MODE_P (inner_mode))
984 mode = MIN_MODE_VECTOR_ACCUM;
985 else if (SCALAR_UACCUM_MODE_P (inner_mode))
986 mode = MIN_MODE_VECTOR_UACCUM;
987 else
988 mode = MIN_MODE_VECTOR_INT;
989
990 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
991 if (GET_MODE_INNER (mode) == inner_mode
992 && GET_MODE_NUNITS (mode) > best_nunits
993 && optab_handler (op, mode) != CODE_FOR_nothing)
994 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
995
996 if (best_mode == VOIDmode)
997 return NULL_TREE;
998 else
999 return build_vector_type_for_mode (type, best_mode);
1000 }
1001
1002
1003 /* Build a reference to the element of the vector VECT. Function
1004 returns either the element itself, either BIT_FIELD_REF, or an
1005 ARRAY_REF expression.
1006
1007 GSI is required to insert temporary variables while building a
1008 refernece to the element of the vector VECT.
1009
1010 PTMPVEC is a pointer to the temporary variable for caching
1011 purposes. In case when PTMPVEC is NULL new temporary variable
1012 will be created. */
1013 static tree
1014 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1015 {
1016 tree vect_type, vect_elt_type;
1017 gimple asgn;
1018 tree tmpvec;
1019 tree arraytype;
1020 bool need_asgn = true;
1021 unsigned int elements;
1022
1023 vect_type = TREE_TYPE (vect);
1024 vect_elt_type = TREE_TYPE (vect_type);
1025 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1026
1027 if (TREE_CODE (idx) == INTEGER_CST)
1028 {
1029 unsigned HOST_WIDE_INT index;
1030
1031 /* Given that we're about to compute a binary modulus,
1032 we don't care about the high bits of the value. */
1033 index = TREE_INT_CST_LOW (idx);
1034 if (!host_integerp (idx, 1) || index >= elements)
1035 {
1036 index &= elements - 1;
1037 idx = build_int_cst (TREE_TYPE (idx), index);
1038 }
1039
1040 /* When lowering a vector statement sequence do some easy
1041 simplification by looking through intermediate vector results. */
1042 if (TREE_CODE (vect) == SSA_NAME)
1043 {
1044 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1045 if (is_gimple_assign (def_stmt)
1046 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1047 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1048 vect = gimple_assign_rhs1 (def_stmt);
1049 }
1050
1051 if (TREE_CODE (vect) == VECTOR_CST)
1052 return VECTOR_CST_ELT (vect, index);
1053 else if (TREE_CODE (vect) == CONSTRUCTOR
1054 && (CONSTRUCTOR_NELTS (vect) == 0
1055 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1056 != VECTOR_TYPE))
1057 {
1058 if (index < CONSTRUCTOR_NELTS (vect))
1059 return CONSTRUCTOR_ELT (vect, index)->value;
1060 return build_zero_cst (vect_elt_type);
1061 }
1062 else
1063 {
1064 tree size = TYPE_SIZE (vect_elt_type);
1065 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1066 size);
1067 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1068 }
1069 }
1070
1071 if (!ptmpvec)
1072 tmpvec = create_tmp_var (vect_type, "vectmp");
1073 else if (!*ptmpvec)
1074 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1075 else
1076 {
1077 tmpvec = *ptmpvec;
1078 need_asgn = false;
1079 }
1080
1081 if (need_asgn)
1082 {
1083 TREE_ADDRESSABLE (tmpvec) = 1;
1084 asgn = gimple_build_assign (tmpvec, vect);
1085 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1086 }
1087
1088 arraytype = build_array_type_nelts (vect_elt_type, elements);
1089 return build4 (ARRAY_REF, vect_elt_type,
1090 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1091 idx, NULL_TREE, NULL_TREE);
1092 }
1093
1094 /* Check if VEC_PERM_EXPR within the given setting is supported
1095 by hardware, or lower it piecewise.
1096
1097 When VEC_PERM_EXPR has the same first and second operands:
1098 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1099 {v0[mask[0]], v0[mask[1]], ...}
1100 MASK and V0 must have the same number of elements.
1101
1102 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1103 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1104 V0 and V1 must have the same type. MASK, V0, V1 must have the
1105 same number of arguments. */
1106
1107 static void
1108 lower_vec_perm (gimple_stmt_iterator *gsi)
1109 {
1110 gimple stmt = gsi_stmt (*gsi);
1111 tree mask = gimple_assign_rhs3 (stmt);
1112 tree vec0 = gimple_assign_rhs1 (stmt);
1113 tree vec1 = gimple_assign_rhs2 (stmt);
1114 tree vect_type = TREE_TYPE (vec0);
1115 tree mask_type = TREE_TYPE (mask);
1116 tree vect_elt_type = TREE_TYPE (vect_type);
1117 tree mask_elt_type = TREE_TYPE (mask_type);
1118 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1119 VEC(constructor_elt,gc) *v;
1120 tree constr, t, si, i_val;
1121 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1122 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1123 location_t loc = gimple_location (gsi_stmt (*gsi));
1124 unsigned i;
1125
1126 if (TREE_CODE (mask) == SSA_NAME)
1127 {
1128 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1129 if (is_gimple_assign (def_stmt)
1130 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1131 mask = gimple_assign_rhs1 (def_stmt);
1132 }
1133
1134 if (TREE_CODE (mask) == VECTOR_CST)
1135 {
1136 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1137
1138 for (i = 0; i < elements; ++i)
1139 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1140 & (2 * elements - 1));
1141
1142 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1143 {
1144 gimple_assign_set_rhs3 (stmt, mask);
1145 update_stmt (stmt);
1146 return;
1147 }
1148 }
1149 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1150 return;
1151
1152 warning_at (loc, OPT_Wvector_operation_performance,
1153 "vector shuffling operation will be expanded piecewise");
1154
1155 v = VEC_alloc (constructor_elt, gc, elements);
1156 for (i = 0; i < elements; i++)
1157 {
1158 si = size_int (i);
1159 i_val = vector_element (gsi, mask, si, &masktmp);
1160
1161 if (TREE_CODE (i_val) == INTEGER_CST)
1162 {
1163 unsigned HOST_WIDE_INT index;
1164
1165 index = TREE_INT_CST_LOW (i_val);
1166 if (!host_integerp (i_val, 1) || index >= elements)
1167 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1168
1169 if (two_operand_p && (index & elements) != 0)
1170 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1171 else
1172 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1173
1174 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1175 true, GSI_SAME_STMT);
1176 }
1177 else
1178 {
1179 tree cond = NULL_TREE, v0_val;
1180
1181 if (two_operand_p)
1182 {
1183 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1184 build_int_cst (mask_elt_type, elements));
1185 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1186 true, GSI_SAME_STMT);
1187 }
1188
1189 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1190 build_int_cst (mask_elt_type, elements - 1));
1191 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1192 true, GSI_SAME_STMT);
1193
1194 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1195 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1196 true, GSI_SAME_STMT);
1197
1198 if (two_operand_p)
1199 {
1200 tree v1_val;
1201
1202 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1203 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1204 true, GSI_SAME_STMT);
1205
1206 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1207 cond, build_zero_cst (mask_elt_type));
1208 cond = fold_build3 (COND_EXPR, vect_elt_type,
1209 cond, v0_val, v1_val);
1210 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1211 true, GSI_SAME_STMT);
1212 }
1213 else
1214 t = v0_val;
1215 }
1216
1217 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1218 }
1219
1220 constr = build_constructor (vect_type, v);
1221 gimple_assign_set_rhs_from_tree (gsi, constr);
1222 update_stmt (gsi_stmt (*gsi));
1223 }
1224
1225 /* Process one statement. If we identify a vector operation, expand it. */
1226
1227 static void
1228 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1229 {
1230 gimple stmt = gsi_stmt (*gsi);
1231 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1232 enum tree_code code;
1233 enum machine_mode compute_mode;
1234 optab op = unknown_optab;
1235 enum gimple_rhs_class rhs_class;
1236 tree new_rhs;
1237
1238 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1239 return;
1240
1241 code = gimple_assign_rhs_code (stmt);
1242 rhs_class = get_gimple_rhs_class (code);
1243 lhs = gimple_assign_lhs (stmt);
1244
1245 if (code == VEC_PERM_EXPR)
1246 {
1247 lower_vec_perm (gsi);
1248 return;
1249 }
1250
1251 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1252 return;
1253
1254 rhs1 = gimple_assign_rhs1 (stmt);
1255 type = gimple_expr_type (stmt);
1256 if (rhs_class == GIMPLE_BINARY_RHS)
1257 rhs2 = gimple_assign_rhs2 (stmt);
1258
1259 if (TREE_CODE (type) != VECTOR_TYPE)
1260 return;
1261
1262 if (code == NOP_EXPR
1263 || code == FLOAT_EXPR
1264 || code == FIX_TRUNC_EXPR
1265 || code == VIEW_CONVERT_EXPR)
1266 return;
1267
1268 gcc_assert (code != CONVERT_EXPR);
1269
1270 /* The signedness is determined from input argument. */
1271 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1272 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1273 type = TREE_TYPE (rhs1);
1274
1275 /* For widening/narrowing vector operations, the relevant type is of the
1276 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1277 calculated in the same way above. */
1278 if (code == WIDEN_SUM_EXPR
1279 || code == VEC_WIDEN_MULT_HI_EXPR
1280 || code == VEC_WIDEN_MULT_LO_EXPR
1281 || code == VEC_WIDEN_MULT_EVEN_EXPR
1282 || code == VEC_WIDEN_MULT_ODD_EXPR
1283 || code == VEC_UNPACK_HI_EXPR
1284 || code == VEC_UNPACK_LO_EXPR
1285 || code == VEC_PACK_TRUNC_EXPR
1286 || code == VEC_PACK_SAT_EXPR
1287 || code == VEC_PACK_FIX_TRUNC_EXPR
1288 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1289 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1290 type = TREE_TYPE (rhs1);
1291
1292 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1293 scalar */
1294 if (code == LSHIFT_EXPR
1295 || code == RSHIFT_EXPR
1296 || code == LROTATE_EXPR
1297 || code == RROTATE_EXPR)
1298 {
1299 optab opv;
1300
1301 /* Check whether we have vector <op> {x,x,x,x} where x
1302 could be a scalar variable or a constant. Transform
1303 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1304 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1305 {
1306 tree first;
1307 gimple def_stmt;
1308
1309 if ((TREE_CODE (rhs2) == VECTOR_CST
1310 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1311 || (TREE_CODE (rhs2) == SSA_NAME
1312 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1313 && gimple_assign_single_p (def_stmt)
1314 && (first = uniform_vector_p
1315 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1316 {
1317 gimple_assign_set_rhs2 (stmt, first);
1318 update_stmt (stmt);
1319 rhs2 = first;
1320 }
1321 }
1322
1323 opv = optab_for_tree_code (code, type, optab_vector);
1324 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1325 op = opv;
1326 else
1327 {
1328 op = optab_for_tree_code (code, type, optab_scalar);
1329
1330 /* The rtl expander will expand vector/scalar as vector/vector
1331 if necessary. Don't bother converting the stmt here. */
1332 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1333 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1334 return;
1335 }
1336 }
1337 else
1338 op = optab_for_tree_code (code, type, optab_default);
1339
1340 /* Optabs will try converting a negation into a subtraction, so
1341 look for it as well. TODO: negation of floating-point vectors
1342 might be turned into an exclusive OR toggling the sign bit. */
1343 if (op == unknown_optab
1344 && code == NEGATE_EXPR
1345 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1346 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1347
1348 /* For very wide vectors, try using a smaller vector mode. */
1349 compute_type = type;
1350 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1351 {
1352 tree vector_compute_type
1353 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1354 if (vector_compute_type != NULL_TREE
1355 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1356 < TYPE_VECTOR_SUBPARTS (compute_type))
1357 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1358 != CODE_FOR_nothing))
1359 compute_type = vector_compute_type;
1360 }
1361
1362 /* If we are breaking a BLKmode vector into smaller pieces,
1363 type_for_widest_vector_mode has already looked into the optab,
1364 so skip these checks. */
1365 if (compute_type == type)
1366 {
1367 compute_mode = TYPE_MODE (compute_type);
1368 if (VECTOR_MODE_P (compute_mode))
1369 {
1370 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1371 return;
1372 if (code == MULT_HIGHPART_EXPR
1373 && can_mult_highpart_p (compute_mode,
1374 TYPE_UNSIGNED (compute_type)))
1375 return;
1376 }
1377 /* There is no operation in hardware, so fall back to scalars. */
1378 compute_type = TREE_TYPE (type);
1379 }
1380
1381 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1382 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1383
1384 /* Leave expression untouched for later expansion. */
1385 if (new_rhs == NULL_TREE)
1386 return;
1387
1388 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1389 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1390 new_rhs);
1391
1392 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1393 way to do it is change expand_vector_operation and its callees to
1394 return a tree_code, RHS1 and RHS2 instead of a tree. */
1395 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1396 update_stmt (gsi_stmt (*gsi));
1397 }
1398 \f
1399 /* Use this to lower vector operations introduced by the vectorizer,
1400 if it may need the bit-twiddling tricks implemented in this file. */
1401
1402 static bool
1403 gate_expand_vector_operations_ssa (void)
1404 {
1405 return optimize == 0;
1406 }
1407
1408 static unsigned int
1409 expand_vector_operations (void)
1410 {
1411 gimple_stmt_iterator gsi;
1412 basic_block bb;
1413 bool cfg_changed = false;
1414
1415 FOR_EACH_BB (bb)
1416 {
1417 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1418 {
1419 expand_vector_operations_1 (&gsi);
1420 /* ??? If we do not cleanup EH then we will ICE in
1421 verification. But in reality we have created wrong-code
1422 as we did not properly transition EH info and edges to
1423 the piecewise computations. */
1424 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1425 && gimple_purge_dead_eh_edges (bb))
1426 cfg_changed = true;
1427 }
1428 }
1429
1430 return cfg_changed ? TODO_cleanup_cfg : 0;
1431 }
1432
1433 struct gimple_opt_pass pass_lower_vector =
1434 {
1435 {
1436 GIMPLE_PASS,
1437 "veclower", /* name */
1438 OPTGROUP_VEC, /* optinfo_flags */
1439 gate_expand_vector_operations_ssa, /* gate */
1440 expand_vector_operations, /* execute */
1441 NULL, /* sub */
1442 NULL, /* next */
1443 0, /* static_pass_number */
1444 TV_NONE, /* tv_id */
1445 PROP_cfg, /* properties_required */
1446 0, /* properties_provided */
1447 0, /* properties_destroyed */
1448 0, /* todo_flags_start */
1449 TODO_update_ssa /* todo_flags_finish */
1450 | TODO_verify_ssa
1451 | TODO_verify_stmts | TODO_verify_flow
1452 | TODO_cleanup_cfg
1453 }
1454 };
1455
1456 struct gimple_opt_pass pass_lower_vector_ssa =
1457 {
1458 {
1459 GIMPLE_PASS,
1460 "veclower2", /* name */
1461 OPTGROUP_VEC, /* optinfo_flags */
1462 0, /* gate */
1463 expand_vector_operations, /* execute */
1464 NULL, /* sub */
1465 NULL, /* next */
1466 0, /* static_pass_number */
1467 TV_NONE, /* tv_id */
1468 PROP_cfg, /* properties_required */
1469 0, /* properties_provided */
1470 0, /* properties_destroyed */
1471 0, /* todo_flags_start */
1472 TODO_update_ssa /* todo_flags_finish */
1473 | TODO_verify_ssa
1474 | TODO_verify_stmts | TODO_verify_flow
1475 | TODO_cleanup_cfg
1476 }
1477 };
1478
1479 #include "gt-tree-vect-generic.h"