gimplify-be.h: New file.
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "tm.h"
25 #include "langhooks.h"
26 #include "gimple.h"
27 #include "gimple-iterator.h"
28 #include "gimplify-me.h"
29 #include "gimple-ssa.h"
30 #include "tree-cfg.h"
31 #include "tree-ssanames.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "flags.h"
35 #include "ggc.h"
36 #include "diagnostic.h"
37 #include "target.h"
38
39 /* Need to include rtl.h, expr.h, etc. for optabs. */
40 #include "expr.h"
41 #include "optabs.h"
42
43
44 static void expand_vector_operations_1 (gimple_stmt_iterator *);
45
46
47 /* Build a constant of type TYPE, made of VALUE's bits replicated
48 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
49 static tree
50 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
51 {
52 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
53 int n = HOST_BITS_PER_WIDE_INT / width;
54 unsigned HOST_WIDE_INT low, high, mask;
55 tree ret;
56
57 gcc_assert (n);
58
59 if (width == HOST_BITS_PER_WIDE_INT)
60 low = value;
61 else
62 {
63 mask = ((HOST_WIDE_INT)1 << width) - 1;
64 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
65 }
66
67 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
68 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
69 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
70 high = 0;
71 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
72 high = low;
73 else
74 gcc_unreachable ();
75
76 ret = build_int_cst_wide (type, low, high);
77 return ret;
78 }
79
80 static GTY(()) tree vector_inner_type;
81 static GTY(()) tree vector_last_type;
82 static GTY(()) int vector_last_nunits;
83
84 /* Return a suitable vector types made of SUBPARTS units each of mode
85 "word_mode" (the global variable). */
86 static tree
87 build_word_mode_vector_type (int nunits)
88 {
89 if (!vector_inner_type)
90 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
91 else if (vector_last_nunits == nunits)
92 {
93 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
94 return vector_last_type;
95 }
96
97 /* We build a new type, but we canonicalize it nevertheless,
98 because it still saves some memory. */
99 vector_last_nunits = nunits;
100 vector_last_type = type_hash_canon (nunits,
101 build_vector_type (vector_inner_type,
102 nunits));
103 return vector_last_type;
104 }
105
106 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
107 tree, tree, tree, tree, tree, enum tree_code);
108
109 static inline tree
110 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
111 tree t, tree bitsize, tree bitpos)
112 {
113 if (bitpos)
114 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
115 else
116 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
117 }
118
119 static tree
120 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
121 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
122 enum tree_code code)
123 {
124 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
125 return gimplify_build1 (gsi, code, inner_type, a);
126 }
127
128 static tree
129 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
130 tree bitpos, tree bitsize, enum tree_code code)
131 {
132 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
133 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
134 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
135 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
136 return gimplify_build2 (gsi, code, inner_type, a, b);
137 }
138
139 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
140
141 INNER_TYPE is the type of A and B elements
142
143 returned expression is of signed integer type with the
144 size equal to the size of INNER_TYPE. */
145 static tree
146 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
147 tree bitpos, tree bitsize, enum tree_code code)
148 {
149 tree comp_type;
150
151 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
152 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
153
154 comp_type = build_nonstandard_integer_type
155 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
156
157 return gimplify_build3 (gsi, COND_EXPR, comp_type,
158 fold_build2 (code, boolean_type_node, a, b),
159 build_int_cst (comp_type, -1),
160 build_int_cst (comp_type, 0));
161 }
162
163 /* Expand vector addition to scalars. This does bit twiddling
164 in order to increase parallelism:
165
166 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
167 (a ^ b) & 0x80808080
168
169 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
170 (a ^ ~b) & 0x80808080
171
172 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
173
174 This optimization should be done only if 4 vector items or more
175 fit into a word. */
176 static tree
177 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
178 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
179 enum tree_code code)
180 {
181 tree inner_type = TREE_TYPE (TREE_TYPE (a));
182 unsigned HOST_WIDE_INT max;
183 tree low_bits, high_bits, a_low, b_low, result_low, signs;
184
185 max = GET_MODE_MASK (TYPE_MODE (inner_type));
186 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
187 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
188
189 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
190 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
191
192 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
193 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
194 if (code == PLUS_EXPR)
195 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
196 else
197 {
198 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
199 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
200 }
201
202 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
203 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
204 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
205 }
206
207 static tree
208 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
209 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
210 tree bitsize ATTRIBUTE_UNUSED,
211 enum tree_code code ATTRIBUTE_UNUSED)
212 {
213 tree inner_type = TREE_TYPE (TREE_TYPE (b));
214 HOST_WIDE_INT max;
215 tree low_bits, high_bits, b_low, result_low, signs;
216
217 max = GET_MODE_MASK (TYPE_MODE (inner_type));
218 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
219 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
220
221 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
222
223 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
224 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
225 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
226 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
227 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
228 }
229
230 /* Expand a vector operation to scalars, by using many operations
231 whose type is the vector type's inner type. */
232 static tree
233 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
234 tree type, tree inner_type,
235 tree a, tree b, enum tree_code code)
236 {
237 vec<constructor_elt, va_gc> *v;
238 tree part_width = TYPE_SIZE (inner_type);
239 tree index = bitsize_int (0);
240 int nunits = TYPE_VECTOR_SUBPARTS (type);
241 int delta = tree_low_cst (part_width, 1)
242 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
243 int i;
244 location_t loc = gimple_location (gsi_stmt (*gsi));
245
246 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
247 warning_at (loc, OPT_Wvector_operation_performance,
248 "vector operation will be expanded piecewise");
249 else
250 warning_at (loc, OPT_Wvector_operation_performance,
251 "vector operation will be expanded in parallel");
252
253 vec_alloc (v, (nunits + delta - 1) / delta);
254 for (i = 0; i < nunits;
255 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
256 {
257 tree result = f (gsi, inner_type, a, b, index, part_width, code);
258 constructor_elt ce = {NULL_TREE, result};
259 v->quick_push (ce);
260 }
261
262 return build_constructor (type, v);
263 }
264
265 /* Expand a vector operation to scalars with the freedom to use
266 a scalar integer type, or to use a different size for the items
267 in the vector type. */
268 static tree
269 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
270 tree a, tree b,
271 enum tree_code code)
272 {
273 tree result, compute_type;
274 enum machine_mode mode;
275 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
276 location_t loc = gimple_location (gsi_stmt (*gsi));
277
278 /* We have three strategies. If the type is already correct, just do
279 the operation an element at a time. Else, if the vector is wider than
280 one word, do it a word at a time; finally, if the vector is smaller
281 than one word, do it as a scalar. */
282 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
283 return expand_vector_piecewise (gsi, f,
284 type, TREE_TYPE (type),
285 a, b, code);
286 else if (n_words > 1)
287 {
288 tree word_type = build_word_mode_vector_type (n_words);
289 result = expand_vector_piecewise (gsi, f,
290 word_type, TREE_TYPE (word_type),
291 a, b, code);
292 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
293 GSI_SAME_STMT);
294 }
295 else
296 {
297 /* Use a single scalar operation with a mode no wider than word_mode. */
298 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
299 compute_type = lang_hooks.types.type_for_mode (mode, 1);
300 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
301 warning_at (loc, OPT_Wvector_operation_performance,
302 "vector operation will be expanded with a "
303 "single scalar operation");
304 }
305
306 return result;
307 }
308
309 /* Expand a vector operation to scalars; for integer types we can use
310 special bit twiddling tricks to do the sums a word at a time, using
311 function F_PARALLEL instead of F. These tricks are done only if
312 they can process at least four items, that is, only if the vector
313 holds at least four items and if a word can hold four items. */
314 static tree
315 expand_vector_addition (gimple_stmt_iterator *gsi,
316 elem_op_func f, elem_op_func f_parallel,
317 tree type, tree a, tree b, enum tree_code code)
318 {
319 int parts_per_word = UNITS_PER_WORD
320 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
321
322 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
323 && parts_per_word >= 4
324 && TYPE_VECTOR_SUBPARTS (type) >= 4)
325 return expand_vector_parallel (gsi, f_parallel,
326 type, a, b, code);
327 else
328 return expand_vector_piecewise (gsi, f,
329 type, TREE_TYPE (type),
330 a, b, code);
331 }
332
333 /* Try to expand vector comparison expression OP0 CODE OP1 by
334 querying optab if the following expression:
335 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
336 can be expanded. */
337 static tree
338 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
339 tree op1, enum tree_code code)
340 {
341 tree t;
342 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
343 t = expand_vector_piecewise (gsi, do_compare, type,
344 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
345 else
346 t = NULL_TREE;
347
348 return t;
349 }
350
351 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
352 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
353 the result if successful, otherwise return NULL_TREE. */
354 static tree
355 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
356 {
357 optab op;
358 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
359 bool scalar_shift = true;
360
361 for (i = 1; i < nunits; i++)
362 {
363 if (shiftcnts[i] != shiftcnts[0])
364 scalar_shift = false;
365 }
366
367 if (scalar_shift && shiftcnts[0] == 0)
368 return op0;
369
370 if (scalar_shift)
371 {
372 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
373 if (op != unknown_optab
374 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
375 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
376 build_int_cst (NULL_TREE, shiftcnts[0]));
377 }
378
379 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
380 if (op != unknown_optab
381 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
382 {
383 tree *vec = XALLOCAVEC (tree, nunits);
384 for (i = 0; i < nunits; i++)
385 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
386 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
387 build_vector (type, vec));
388 }
389
390 return NULL_TREE;
391 }
392
393 /* Try to expand integer vector division by constant using
394 widening multiply, shifts and additions. */
395 static tree
396 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
397 tree op1, enum tree_code code)
398 {
399 bool use_pow2 = true;
400 bool has_vector_shift = true;
401 int mode = -1, this_mode;
402 int pre_shift = -1, post_shift;
403 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
404 int *shifts = XALLOCAVEC (int, nunits * 4);
405 int *pre_shifts = shifts + nunits;
406 int *post_shifts = pre_shifts + nunits;
407 int *shift_temps = post_shifts + nunits;
408 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
409 int prec = TYPE_PRECISION (TREE_TYPE (type));
410 int dummy_int;
411 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
412 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
413 tree *vec;
414 tree cur_op, mulcst, tem;
415 optab op;
416
417 if (prec > HOST_BITS_PER_WIDE_INT)
418 return NULL_TREE;
419
420 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
421 if (op == unknown_optab
422 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
423 has_vector_shift = false;
424
425 /* Analysis phase. Determine if all op1 elements are either power
426 of two and it is possible to expand it using shifts (or for remainder
427 using masking). Additionally compute the multiplicative constants
428 and pre and post shifts if the division is to be expanded using
429 widening or high part multiplication plus shifts. */
430 for (i = 0; i < nunits; i++)
431 {
432 tree cst = VECTOR_CST_ELT (op1, i);
433 unsigned HOST_WIDE_INT ml;
434
435 if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
436 return NULL_TREE;
437 pre_shifts[i] = 0;
438 post_shifts[i] = 0;
439 mulc[i] = 0;
440 if (use_pow2
441 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
442 use_pow2 = false;
443 if (use_pow2)
444 {
445 shifts[i] = tree_log2 (cst);
446 if (shifts[i] != shifts[0]
447 && code == TRUNC_DIV_EXPR
448 && !has_vector_shift)
449 use_pow2 = false;
450 }
451 if (mode == -2)
452 continue;
453 if (unsignedp)
454 {
455 unsigned HOST_WIDE_INT mh;
456 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
457
458 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
459 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
460 return NULL_TREE;
461
462 if (d <= 1)
463 {
464 mode = -2;
465 continue;
466 }
467
468 /* Find a suitable multiplier and right shift count
469 instead of multiplying with D. */
470 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
471
472 /* If the suggested multiplier is more than SIZE bits, we can
473 do better for even divisors, using an initial right shift. */
474 if ((mh != 0 && (d & 1) == 0)
475 || (!has_vector_shift && pre_shift != -1))
476 {
477 if (has_vector_shift)
478 pre_shift = floor_log2 (d & -d);
479 else if (pre_shift == -1)
480 {
481 unsigned int j;
482 for (j = 0; j < nunits; j++)
483 {
484 tree cst2 = VECTOR_CST_ELT (op1, j);
485 unsigned HOST_WIDE_INT d2;
486 int this_pre_shift;
487
488 if (!host_integerp (cst2, 1))
489 return NULL_TREE;
490 d2 = tree_low_cst (cst2, 1) & mask;
491 if (d2 == 0)
492 return NULL_TREE;
493 this_pre_shift = floor_log2 (d2 & -d2);
494 if (pre_shift == -1 || this_pre_shift < pre_shift)
495 pre_shift = this_pre_shift;
496 }
497 if (i != 0 && pre_shift != 0)
498 {
499 /* Restart. */
500 i = -1U;
501 mode = -1;
502 continue;
503 }
504 }
505 if (pre_shift != 0)
506 {
507 if ((d >> pre_shift) <= 1)
508 {
509 mode = -2;
510 continue;
511 }
512 mh = choose_multiplier (d >> pre_shift, prec,
513 prec - pre_shift,
514 &ml, &post_shift, &dummy_int);
515 gcc_assert (!mh);
516 pre_shifts[i] = pre_shift;
517 }
518 }
519 if (!mh)
520 this_mode = 0;
521 else
522 this_mode = 1;
523 }
524 else
525 {
526 HOST_WIDE_INT d = tree_low_cst (cst, 0);
527 unsigned HOST_WIDE_INT abs_d;
528
529 if (d == -1)
530 return NULL_TREE;
531
532 /* Since d might be INT_MIN, we have to cast to
533 unsigned HOST_WIDE_INT before negating to avoid
534 undefined signed overflow. */
535 abs_d = (d >= 0
536 ? (unsigned HOST_WIDE_INT) d
537 : - (unsigned HOST_WIDE_INT) d);
538
539 /* n rem d = n rem -d */
540 if (code == TRUNC_MOD_EXPR && d < 0)
541 d = abs_d;
542 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
543 {
544 /* This case is not handled correctly below. */
545 mode = -2;
546 continue;
547 }
548 if (abs_d <= 1)
549 {
550 mode = -2;
551 continue;
552 }
553
554 choose_multiplier (abs_d, prec, prec - 1, &ml,
555 &post_shift, &dummy_int);
556 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
557 {
558 this_mode = 4 + (d < 0);
559 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
560 }
561 else
562 this_mode = 2 + (d < 0);
563 }
564 mulc[i] = ml;
565 post_shifts[i] = post_shift;
566 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
567 || post_shift >= prec
568 || pre_shifts[i] >= prec)
569 this_mode = -2;
570
571 if (i == 0)
572 mode = this_mode;
573 else if (mode != this_mode)
574 mode = -2;
575 }
576
577 vec = XALLOCAVEC (tree, nunits);
578
579 if (use_pow2)
580 {
581 tree addend = NULL_TREE;
582 if (!unsignedp)
583 {
584 tree uns_type;
585
586 /* Both division and remainder sequences need
587 op0 < 0 ? mask : 0 computed. It can be either computed as
588 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
589 if none of the shifts is 0, or as the conditional. */
590 for (i = 0; i < nunits; i++)
591 if (shifts[i] == 0)
592 break;
593 uns_type
594 = build_vector_type (build_nonstandard_integer_type (prec, 1),
595 nunits);
596 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
597 {
598 for (i = 0; i < nunits; i++)
599 shift_temps[i] = prec - 1;
600 cur_op = add_rshift (gsi, type, op0, shift_temps);
601 if (cur_op != NULL_TREE)
602 {
603 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
604 uns_type, cur_op);
605 for (i = 0; i < nunits; i++)
606 shift_temps[i] = prec - shifts[i];
607 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
608 if (cur_op != NULL_TREE)
609 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
610 type, cur_op);
611 }
612 }
613 if (addend == NULL_TREE
614 && expand_vec_cond_expr_p (type, type))
615 {
616 tree zero, cst, cond;
617 gimple stmt;
618
619 zero = build_zero_cst (type);
620 cond = build2 (LT_EXPR, type, op0, zero);
621 for (i = 0; i < nunits; i++)
622 vec[i] = build_int_cst (TREE_TYPE (type),
623 ((unsigned HOST_WIDE_INT) 1
624 << shifts[i]) - 1);
625 cst = build_vector (type, vec);
626 addend = make_ssa_name (type, NULL);
627 stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
628 cond, cst, zero);
629 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
630 }
631 }
632 if (code == TRUNC_DIV_EXPR)
633 {
634 if (unsignedp)
635 {
636 /* q = op0 >> shift; */
637 cur_op = add_rshift (gsi, type, op0, shifts);
638 if (cur_op != NULL_TREE)
639 return cur_op;
640 }
641 else if (addend != NULL_TREE)
642 {
643 /* t1 = op0 + addend;
644 q = t1 >> shift; */
645 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
646 if (op != unknown_optab
647 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
648 {
649 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
650 cur_op = add_rshift (gsi, type, cur_op, shifts);
651 if (cur_op != NULL_TREE)
652 return cur_op;
653 }
654 }
655 }
656 else
657 {
658 tree mask;
659 for (i = 0; i < nunits; i++)
660 vec[i] = build_int_cst (TREE_TYPE (type),
661 ((unsigned HOST_WIDE_INT) 1
662 << shifts[i]) - 1);
663 mask = build_vector (type, vec);
664 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
665 if (op != unknown_optab
666 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
667 {
668 if (unsignedp)
669 /* r = op0 & mask; */
670 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
671 else if (addend != NULL_TREE)
672 {
673 /* t1 = op0 + addend;
674 t2 = t1 & mask;
675 r = t2 - addend; */
676 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
677 if (op != unknown_optab
678 && optab_handler (op, TYPE_MODE (type))
679 != CODE_FOR_nothing)
680 {
681 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
682 addend);
683 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
684 cur_op, mask);
685 op = optab_for_tree_code (MINUS_EXPR, type,
686 optab_default);
687 if (op != unknown_optab
688 && optab_handler (op, TYPE_MODE (type))
689 != CODE_FOR_nothing)
690 return gimplify_build2 (gsi, MINUS_EXPR, type,
691 cur_op, addend);
692 }
693 }
694 }
695 }
696 }
697
698 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
699 return NULL_TREE;
700
701 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
702 return NULL_TREE;
703
704 cur_op = op0;
705
706 switch (mode)
707 {
708 case 0:
709 gcc_assert (unsignedp);
710 /* t1 = oprnd0 >> pre_shift;
711 t2 = t1 h* ml;
712 q = t2 >> post_shift; */
713 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
714 if (cur_op == NULL_TREE)
715 return NULL_TREE;
716 break;
717 case 1:
718 gcc_assert (unsignedp);
719 for (i = 0; i < nunits; i++)
720 {
721 shift_temps[i] = 1;
722 post_shifts[i]--;
723 }
724 break;
725 case 2:
726 case 3:
727 case 4:
728 case 5:
729 gcc_assert (!unsignedp);
730 for (i = 0; i < nunits; i++)
731 shift_temps[i] = prec - 1;
732 break;
733 default:
734 return NULL_TREE;
735 }
736
737 for (i = 0; i < nunits; i++)
738 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
739 mulcst = build_vector (type, vec);
740
741 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
742
743 switch (mode)
744 {
745 case 0:
746 /* t1 = oprnd0 >> pre_shift;
747 t2 = t1 h* ml;
748 q = t2 >> post_shift; */
749 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
750 break;
751 case 1:
752 /* t1 = oprnd0 h* ml;
753 t2 = oprnd0 - t1;
754 t3 = t2 >> 1;
755 t4 = t1 + t3;
756 q = t4 >> (post_shift - 1); */
757 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
758 if (op == unknown_optab
759 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
760 return NULL_TREE;
761 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
762 tem = add_rshift (gsi, type, tem, shift_temps);
763 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
764 if (op == unknown_optab
765 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
766 return NULL_TREE;
767 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
768 cur_op = add_rshift (gsi, type, tem, post_shifts);
769 if (cur_op == NULL_TREE)
770 return NULL_TREE;
771 break;
772 case 2:
773 case 3:
774 case 4:
775 case 5:
776 /* t1 = oprnd0 h* ml;
777 t2 = t1; [ iff (mode & 2) != 0 ]
778 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
779 t3 = t2 >> post_shift;
780 t4 = oprnd0 >> (prec - 1);
781 q = t3 - t4; [ iff (mode & 1) == 0 ]
782 q = t4 - t3; [ iff (mode & 1) != 0 ] */
783 if ((mode & 2) == 0)
784 {
785 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
786 if (op == unknown_optab
787 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
788 return NULL_TREE;
789 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
790 }
791 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
792 if (cur_op == NULL_TREE)
793 return NULL_TREE;
794 tem = add_rshift (gsi, type, op0, shift_temps);
795 if (tem == NULL_TREE)
796 return NULL_TREE;
797 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
798 if (op == unknown_optab
799 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
800 return NULL_TREE;
801 if ((mode & 1) == 0)
802 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
803 else
804 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
805 break;
806 default:
807 gcc_unreachable ();
808 }
809
810 if (code == TRUNC_DIV_EXPR)
811 return cur_op;
812
813 /* We divided. Now finish by:
814 t1 = q * oprnd1;
815 r = oprnd0 - t1; */
816 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
817 if (op == unknown_optab
818 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
819 return NULL_TREE;
820 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
821 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
822 if (op == unknown_optab
823 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
824 return NULL_TREE;
825 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
826 }
827
828 /* Expand a vector condition to scalars, by using many conditions
829 on the vector's elements. */
830 static void
831 expand_vector_condition (gimple_stmt_iterator *gsi)
832 {
833 gimple stmt = gsi_stmt (*gsi);
834 tree type = gimple_expr_type (stmt);
835 tree a = gimple_assign_rhs1 (stmt);
836 tree a1 = a;
837 tree a2;
838 bool a_is_comparison = false;
839 tree b = gimple_assign_rhs2 (stmt);
840 tree c = gimple_assign_rhs3 (stmt);
841 vec<constructor_elt, va_gc> *v;
842 tree constr;
843 tree inner_type = TREE_TYPE (type);
844 tree cond_type = TREE_TYPE (TREE_TYPE (a));
845 tree comp_inner_type = cond_type;
846 tree width = TYPE_SIZE (inner_type);
847 tree index = bitsize_int (0);
848 int nunits = TYPE_VECTOR_SUBPARTS (type);
849 int i;
850 location_t loc = gimple_location (gsi_stmt (*gsi));
851
852 if (!is_gimple_val (a))
853 {
854 gcc_assert (COMPARISON_CLASS_P (a));
855 a_is_comparison = true;
856 a1 = TREE_OPERAND (a, 0);
857 a2 = TREE_OPERAND (a, 1);
858 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
859 }
860
861 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
862 return;
863
864 /* TODO: try and find a smaller vector type. */
865
866 warning_at (loc, OPT_Wvector_operation_performance,
867 "vector condition will be expanded piecewise");
868
869 vec_alloc (v, nunits);
870 for (i = 0; i < nunits;
871 i++, index = int_const_binop (PLUS_EXPR, index, width))
872 {
873 tree aa, result;
874 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
875 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
876 if (a_is_comparison)
877 {
878 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
879 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
880 aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
881 }
882 else
883 aa = tree_vec_extract (gsi, cond_type, a, width, index);
884 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
885 constructor_elt ce = {NULL_TREE, result};
886 v->quick_push (ce);
887 }
888
889 constr = build_constructor (type, v);
890 gimple_assign_set_rhs_from_tree (gsi, constr);
891 update_stmt (gsi_stmt (*gsi));
892 }
893
894 static tree
895 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
896 gimple assign, enum tree_code code)
897 {
898 enum machine_mode compute_mode = TYPE_MODE (compute_type);
899
900 /* If the compute mode is not a vector mode (hence we are not decomposing
901 a BLKmode vector to smaller, hardware-supported vectors), we may want
902 to expand the operations in parallel. */
903 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
904 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
905 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
906 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
907 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
908 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
909 switch (code)
910 {
911 case PLUS_EXPR:
912 case MINUS_EXPR:
913 if (!TYPE_OVERFLOW_TRAPS (type))
914 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
915 gimple_assign_rhs1 (assign),
916 gimple_assign_rhs2 (assign), code);
917 break;
918
919 case NEGATE_EXPR:
920 if (!TYPE_OVERFLOW_TRAPS (type))
921 return expand_vector_addition (gsi, do_unop, do_negate, type,
922 gimple_assign_rhs1 (assign),
923 NULL_TREE, code);
924 break;
925
926 case BIT_AND_EXPR:
927 case BIT_IOR_EXPR:
928 case BIT_XOR_EXPR:
929 return expand_vector_parallel (gsi, do_binop, type,
930 gimple_assign_rhs1 (assign),
931 gimple_assign_rhs2 (assign), code);
932
933 case BIT_NOT_EXPR:
934 return expand_vector_parallel (gsi, do_unop, type,
935 gimple_assign_rhs1 (assign),
936 NULL_TREE, code);
937 case EQ_EXPR:
938 case NE_EXPR:
939 case GT_EXPR:
940 case LT_EXPR:
941 case GE_EXPR:
942 case LE_EXPR:
943 case UNEQ_EXPR:
944 case UNGT_EXPR:
945 case UNLT_EXPR:
946 case UNGE_EXPR:
947 case UNLE_EXPR:
948 case LTGT_EXPR:
949 case ORDERED_EXPR:
950 case UNORDERED_EXPR:
951 {
952 tree rhs1 = gimple_assign_rhs1 (assign);
953 tree rhs2 = gimple_assign_rhs2 (assign);
954
955 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
956 }
957
958 case TRUNC_DIV_EXPR:
959 case TRUNC_MOD_EXPR:
960 {
961 tree rhs1 = gimple_assign_rhs1 (assign);
962 tree rhs2 = gimple_assign_rhs2 (assign);
963 tree ret;
964
965 if (!optimize
966 || !VECTOR_INTEGER_TYPE_P (type)
967 || TREE_CODE (rhs2) != VECTOR_CST)
968 break;
969
970 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
971 if (ret != NULL_TREE)
972 return ret;
973 break;
974 }
975
976 default:
977 break;
978 }
979
980 if (TREE_CODE_CLASS (code) == tcc_unary)
981 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
982 gimple_assign_rhs1 (assign),
983 NULL_TREE, code);
984 else
985 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
986 gimple_assign_rhs1 (assign),
987 gimple_assign_rhs2 (assign), code);
988 }
989 \f
990 /* Return a type for the widest vector mode whose components are of type
991 TYPE, or NULL_TREE if none is found. */
992
993 static tree
994 type_for_widest_vector_mode (tree type, optab op)
995 {
996 enum machine_mode inner_mode = TYPE_MODE (type);
997 enum machine_mode best_mode = VOIDmode, mode;
998 int best_nunits = 0;
999
1000 if (SCALAR_FLOAT_MODE_P (inner_mode))
1001 mode = MIN_MODE_VECTOR_FLOAT;
1002 else if (SCALAR_FRACT_MODE_P (inner_mode))
1003 mode = MIN_MODE_VECTOR_FRACT;
1004 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1005 mode = MIN_MODE_VECTOR_UFRACT;
1006 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1007 mode = MIN_MODE_VECTOR_ACCUM;
1008 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1009 mode = MIN_MODE_VECTOR_UACCUM;
1010 else
1011 mode = MIN_MODE_VECTOR_INT;
1012
1013 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1014 if (GET_MODE_INNER (mode) == inner_mode
1015 && GET_MODE_NUNITS (mode) > best_nunits
1016 && optab_handler (op, mode) != CODE_FOR_nothing)
1017 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1018
1019 if (best_mode == VOIDmode)
1020 return NULL_TREE;
1021 else
1022 return build_vector_type_for_mode (type, best_mode);
1023 }
1024
1025
1026 /* Build a reference to the element of the vector VECT. Function
1027 returns either the element itself, either BIT_FIELD_REF, or an
1028 ARRAY_REF expression.
1029
1030 GSI is required to insert temporary variables while building a
1031 refernece to the element of the vector VECT.
1032
1033 PTMPVEC is a pointer to the temporary variable for caching
1034 purposes. In case when PTMPVEC is NULL new temporary variable
1035 will be created. */
1036 static tree
1037 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1038 {
1039 tree vect_type, vect_elt_type;
1040 gimple asgn;
1041 tree tmpvec;
1042 tree arraytype;
1043 bool need_asgn = true;
1044 unsigned int elements;
1045
1046 vect_type = TREE_TYPE (vect);
1047 vect_elt_type = TREE_TYPE (vect_type);
1048 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1049
1050 if (TREE_CODE (idx) == INTEGER_CST)
1051 {
1052 unsigned HOST_WIDE_INT index;
1053
1054 /* Given that we're about to compute a binary modulus,
1055 we don't care about the high bits of the value. */
1056 index = TREE_INT_CST_LOW (idx);
1057 if (!host_integerp (idx, 1) || index >= elements)
1058 {
1059 index &= elements - 1;
1060 idx = build_int_cst (TREE_TYPE (idx), index);
1061 }
1062
1063 /* When lowering a vector statement sequence do some easy
1064 simplification by looking through intermediate vector results. */
1065 if (TREE_CODE (vect) == SSA_NAME)
1066 {
1067 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1068 if (is_gimple_assign (def_stmt)
1069 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1070 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1071 vect = gimple_assign_rhs1 (def_stmt);
1072 }
1073
1074 if (TREE_CODE (vect) == VECTOR_CST)
1075 return VECTOR_CST_ELT (vect, index);
1076 else if (TREE_CODE (vect) == CONSTRUCTOR
1077 && (CONSTRUCTOR_NELTS (vect) == 0
1078 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1079 != VECTOR_TYPE))
1080 {
1081 if (index < CONSTRUCTOR_NELTS (vect))
1082 return CONSTRUCTOR_ELT (vect, index)->value;
1083 return build_zero_cst (vect_elt_type);
1084 }
1085 else
1086 {
1087 tree size = TYPE_SIZE (vect_elt_type);
1088 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1089 size);
1090 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1091 }
1092 }
1093
1094 if (!ptmpvec)
1095 tmpvec = create_tmp_var (vect_type, "vectmp");
1096 else if (!*ptmpvec)
1097 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1098 else
1099 {
1100 tmpvec = *ptmpvec;
1101 need_asgn = false;
1102 }
1103
1104 if (need_asgn)
1105 {
1106 TREE_ADDRESSABLE (tmpvec) = 1;
1107 asgn = gimple_build_assign (tmpvec, vect);
1108 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1109 }
1110
1111 arraytype = build_array_type_nelts (vect_elt_type, elements);
1112 return build4 (ARRAY_REF, vect_elt_type,
1113 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1114 idx, NULL_TREE, NULL_TREE);
1115 }
1116
1117 /* Check if VEC_PERM_EXPR within the given setting is supported
1118 by hardware, or lower it piecewise.
1119
1120 When VEC_PERM_EXPR has the same first and second operands:
1121 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1122 {v0[mask[0]], v0[mask[1]], ...}
1123 MASK and V0 must have the same number of elements.
1124
1125 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1126 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1127 V0 and V1 must have the same type. MASK, V0, V1 must have the
1128 same number of arguments. */
1129
1130 static void
1131 lower_vec_perm (gimple_stmt_iterator *gsi)
1132 {
1133 gimple stmt = gsi_stmt (*gsi);
1134 tree mask = gimple_assign_rhs3 (stmt);
1135 tree vec0 = gimple_assign_rhs1 (stmt);
1136 tree vec1 = gimple_assign_rhs2 (stmt);
1137 tree vect_type = TREE_TYPE (vec0);
1138 tree mask_type = TREE_TYPE (mask);
1139 tree vect_elt_type = TREE_TYPE (vect_type);
1140 tree mask_elt_type = TREE_TYPE (mask_type);
1141 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1142 vec<constructor_elt, va_gc> *v;
1143 tree constr, t, si, i_val;
1144 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1145 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1146 location_t loc = gimple_location (gsi_stmt (*gsi));
1147 unsigned i;
1148
1149 if (TREE_CODE (mask) == SSA_NAME)
1150 {
1151 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1152 if (is_gimple_assign (def_stmt)
1153 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1154 mask = gimple_assign_rhs1 (def_stmt);
1155 }
1156
1157 if (TREE_CODE (mask) == VECTOR_CST)
1158 {
1159 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1160
1161 for (i = 0; i < elements; ++i)
1162 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1163 & (2 * elements - 1));
1164
1165 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1166 {
1167 gimple_assign_set_rhs3 (stmt, mask);
1168 update_stmt (stmt);
1169 return;
1170 }
1171 }
1172 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1173 return;
1174
1175 warning_at (loc, OPT_Wvector_operation_performance,
1176 "vector shuffling operation will be expanded piecewise");
1177
1178 vec_alloc (v, elements);
1179 for (i = 0; i < elements; i++)
1180 {
1181 si = size_int (i);
1182 i_val = vector_element (gsi, mask, si, &masktmp);
1183
1184 if (TREE_CODE (i_val) == INTEGER_CST)
1185 {
1186 unsigned HOST_WIDE_INT index;
1187
1188 index = TREE_INT_CST_LOW (i_val);
1189 if (!host_integerp (i_val, 1) || index >= elements)
1190 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1191
1192 if (two_operand_p && (index & elements) != 0)
1193 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1194 else
1195 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1196
1197 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1198 true, GSI_SAME_STMT);
1199 }
1200 else
1201 {
1202 tree cond = NULL_TREE, v0_val;
1203
1204 if (two_operand_p)
1205 {
1206 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1207 build_int_cst (mask_elt_type, elements));
1208 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1209 true, GSI_SAME_STMT);
1210 }
1211
1212 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1213 build_int_cst (mask_elt_type, elements - 1));
1214 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1215 true, GSI_SAME_STMT);
1216
1217 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1218 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1219 true, GSI_SAME_STMT);
1220
1221 if (two_operand_p)
1222 {
1223 tree v1_val;
1224
1225 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1226 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1227 true, GSI_SAME_STMT);
1228
1229 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1230 cond, build_zero_cst (mask_elt_type));
1231 cond = fold_build3 (COND_EXPR, vect_elt_type,
1232 cond, v0_val, v1_val);
1233 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1234 true, GSI_SAME_STMT);
1235 }
1236 else
1237 t = v0_val;
1238 }
1239
1240 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1241 }
1242
1243 constr = build_constructor (vect_type, v);
1244 gimple_assign_set_rhs_from_tree (gsi, constr);
1245 update_stmt (gsi_stmt (*gsi));
1246 }
1247
1248 /* Process one statement. If we identify a vector operation, expand it. */
1249
1250 static void
1251 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1252 {
1253 gimple stmt = gsi_stmt (*gsi);
1254 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1255 enum tree_code code;
1256 enum machine_mode compute_mode;
1257 optab op = unknown_optab;
1258 enum gimple_rhs_class rhs_class;
1259 tree new_rhs;
1260
1261 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1262 return;
1263
1264 code = gimple_assign_rhs_code (stmt);
1265 rhs_class = get_gimple_rhs_class (code);
1266 lhs = gimple_assign_lhs (stmt);
1267
1268 if (code == VEC_PERM_EXPR)
1269 {
1270 lower_vec_perm (gsi);
1271 return;
1272 }
1273
1274 if (code == VEC_COND_EXPR)
1275 {
1276 expand_vector_condition (gsi);
1277 return;
1278 }
1279 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1280 return;
1281
1282 rhs1 = gimple_assign_rhs1 (stmt);
1283 type = gimple_expr_type (stmt);
1284 if (rhs_class == GIMPLE_BINARY_RHS)
1285 rhs2 = gimple_assign_rhs2 (stmt);
1286
1287 if (TREE_CODE (type) != VECTOR_TYPE)
1288 return;
1289
1290 if (code == NOP_EXPR
1291 || code == FLOAT_EXPR
1292 || code == FIX_TRUNC_EXPR
1293 || code == VIEW_CONVERT_EXPR)
1294 return;
1295
1296 gcc_assert (code != CONVERT_EXPR);
1297
1298 /* The signedness is determined from input argument. */
1299 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1300 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1301 type = TREE_TYPE (rhs1);
1302
1303 /* For widening/narrowing vector operations, the relevant type is of the
1304 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1305 calculated in the same way above. */
1306 if (code == WIDEN_SUM_EXPR
1307 || code == VEC_WIDEN_MULT_HI_EXPR
1308 || code == VEC_WIDEN_MULT_LO_EXPR
1309 || code == VEC_WIDEN_MULT_EVEN_EXPR
1310 || code == VEC_WIDEN_MULT_ODD_EXPR
1311 || code == VEC_UNPACK_HI_EXPR
1312 || code == VEC_UNPACK_LO_EXPR
1313 || code == VEC_PACK_TRUNC_EXPR
1314 || code == VEC_PACK_SAT_EXPR
1315 || code == VEC_PACK_FIX_TRUNC_EXPR
1316 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1317 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1318 type = TREE_TYPE (rhs1);
1319
1320 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1321 scalar */
1322 if (code == LSHIFT_EXPR
1323 || code == RSHIFT_EXPR
1324 || code == LROTATE_EXPR
1325 || code == RROTATE_EXPR)
1326 {
1327 optab opv;
1328
1329 /* Check whether we have vector <op> {x,x,x,x} where x
1330 could be a scalar variable or a constant. Transform
1331 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1332 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1333 {
1334 tree first;
1335 gimple def_stmt;
1336
1337 if ((TREE_CODE (rhs2) == VECTOR_CST
1338 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1339 || (TREE_CODE (rhs2) == SSA_NAME
1340 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1341 && gimple_assign_single_p (def_stmt)
1342 && (first = uniform_vector_p
1343 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1344 {
1345 gimple_assign_set_rhs2 (stmt, first);
1346 update_stmt (stmt);
1347 rhs2 = first;
1348 }
1349 }
1350
1351 opv = optab_for_tree_code (code, type, optab_vector);
1352 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1353 op = opv;
1354 else
1355 {
1356 op = optab_for_tree_code (code, type, optab_scalar);
1357
1358 /* The rtl expander will expand vector/scalar as vector/vector
1359 if necessary. Don't bother converting the stmt here. */
1360 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1361 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1362 return;
1363 }
1364 }
1365 else
1366 op = optab_for_tree_code (code, type, optab_default);
1367
1368 /* Optabs will try converting a negation into a subtraction, so
1369 look for it as well. TODO: negation of floating-point vectors
1370 might be turned into an exclusive OR toggling the sign bit. */
1371 if (op == unknown_optab
1372 && code == NEGATE_EXPR
1373 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1374 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1375
1376 /* For very wide vectors, try using a smaller vector mode. */
1377 compute_type = type;
1378 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1379 {
1380 tree vector_compute_type
1381 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1382 if (vector_compute_type != NULL_TREE
1383 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1384 < TYPE_VECTOR_SUBPARTS (compute_type))
1385 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1386 != CODE_FOR_nothing))
1387 compute_type = vector_compute_type;
1388 }
1389
1390 /* If we are breaking a BLKmode vector into smaller pieces,
1391 type_for_widest_vector_mode has already looked into the optab,
1392 so skip these checks. */
1393 if (compute_type == type)
1394 {
1395 compute_mode = TYPE_MODE (compute_type);
1396 if (VECTOR_MODE_P (compute_mode))
1397 {
1398 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1399 return;
1400 if (code == MULT_HIGHPART_EXPR
1401 && can_mult_highpart_p (compute_mode,
1402 TYPE_UNSIGNED (compute_type)))
1403 return;
1404 }
1405 /* There is no operation in hardware, so fall back to scalars. */
1406 compute_type = TREE_TYPE (type);
1407 }
1408
1409 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1410 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1411
1412 /* Leave expression untouched for later expansion. */
1413 if (new_rhs == NULL_TREE)
1414 return;
1415
1416 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1417 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1418 new_rhs);
1419
1420 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1421 way to do it is change expand_vector_operation and its callees to
1422 return a tree_code, RHS1 and RHS2 instead of a tree. */
1423 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1424 update_stmt (gsi_stmt (*gsi));
1425 }
1426 \f
1427 /* Use this to lower vector operations introduced by the vectorizer,
1428 if it may need the bit-twiddling tricks implemented in this file. */
1429
1430 static bool
1431 gate_expand_vector_operations_ssa (void)
1432 {
1433 return !(cfun->curr_properties & PROP_gimple_lvec);
1434 }
1435
1436 static unsigned int
1437 expand_vector_operations (void)
1438 {
1439 gimple_stmt_iterator gsi;
1440 basic_block bb;
1441 bool cfg_changed = false;
1442
1443 FOR_EACH_BB (bb)
1444 {
1445 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1446 {
1447 expand_vector_operations_1 (&gsi);
1448 /* ??? If we do not cleanup EH then we will ICE in
1449 verification. But in reality we have created wrong-code
1450 as we did not properly transition EH info and edges to
1451 the piecewise computations. */
1452 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1453 && gimple_purge_dead_eh_edges (bb))
1454 cfg_changed = true;
1455 }
1456 }
1457
1458 return cfg_changed ? TODO_cleanup_cfg : 0;
1459 }
1460
1461 namespace {
1462
1463 const pass_data pass_data_lower_vector =
1464 {
1465 GIMPLE_PASS, /* type */
1466 "veclower", /* name */
1467 OPTGROUP_VEC, /* optinfo_flags */
1468 true, /* has_gate */
1469 true, /* has_execute */
1470 TV_NONE, /* tv_id */
1471 PROP_cfg, /* properties_required */
1472 PROP_gimple_lvec, /* properties_provided */
1473 0, /* properties_destroyed */
1474 0, /* todo_flags_start */
1475 ( TODO_update_ssa | TODO_verify_ssa
1476 | TODO_verify_stmts
1477 | TODO_verify_flow
1478 | TODO_cleanup_cfg ), /* todo_flags_finish */
1479 };
1480
1481 class pass_lower_vector : public gimple_opt_pass
1482 {
1483 public:
1484 pass_lower_vector (gcc::context *ctxt)
1485 : gimple_opt_pass (pass_data_lower_vector, ctxt)
1486 {}
1487
1488 /* opt_pass methods: */
1489 bool gate () { return gate_expand_vector_operations_ssa (); }
1490 unsigned int execute () { return expand_vector_operations (); }
1491
1492 }; // class pass_lower_vector
1493
1494 } // anon namespace
1495
1496 gimple_opt_pass *
1497 make_pass_lower_vector (gcc::context *ctxt)
1498 {
1499 return new pass_lower_vector (ctxt);
1500 }
1501
1502 namespace {
1503
1504 const pass_data pass_data_lower_vector_ssa =
1505 {
1506 GIMPLE_PASS, /* type */
1507 "veclower2", /* name */
1508 OPTGROUP_VEC, /* optinfo_flags */
1509 false, /* has_gate */
1510 true, /* has_execute */
1511 TV_NONE, /* tv_id */
1512 PROP_cfg, /* properties_required */
1513 PROP_gimple_lvec, /* properties_provided */
1514 0, /* properties_destroyed */
1515 0, /* todo_flags_start */
1516 ( TODO_update_ssa | TODO_verify_ssa
1517 | TODO_verify_stmts
1518 | TODO_verify_flow
1519 | TODO_cleanup_cfg ), /* todo_flags_finish */
1520 };
1521
1522 class pass_lower_vector_ssa : public gimple_opt_pass
1523 {
1524 public:
1525 pass_lower_vector_ssa (gcc::context *ctxt)
1526 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1527 {}
1528
1529 /* opt_pass methods: */
1530 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1531 unsigned int execute () { return expand_vector_operations (); }
1532
1533 }; // class pass_lower_vector_ssa
1534
1535 } // anon namespace
1536
1537 gimple_opt_pass *
1538 make_pass_lower_vector_ssa (gcc::context *ctxt)
1539 {
1540 return new pass_lower_vector_ssa (ctxt);
1541 }
1542
1543 #include "gt-tree-vect-generic.h"