Use builtin_widen_mult_even/odd in tree-vect-generic division expansion
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34 #include "target.h"
35
36 /* Need to include rtl.h, expr.h, etc. for optabs. */
37 #include "expr.h"
38 #include "optabs.h"
39
40
41 static void expand_vector_operations_1 (gimple_stmt_iterator *);
42
43
44 /* Build a constant of type TYPE, made of VALUE's bits replicated
45 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
46 static tree
47 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
48 {
49 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
50 int n = HOST_BITS_PER_WIDE_INT / width;
51 unsigned HOST_WIDE_INT low, high, mask;
52 tree ret;
53
54 gcc_assert (n);
55
56 if (width == HOST_BITS_PER_WIDE_INT)
57 low = value;
58 else
59 {
60 mask = ((HOST_WIDE_INT)1 << width) - 1;
61 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
62 }
63
64 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
65 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
66 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
67 high = 0;
68 else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
69 high = low;
70 else
71 gcc_unreachable ();
72
73 ret = build_int_cst_wide (type, low, high);
74 return ret;
75 }
76
77 static GTY(()) tree vector_inner_type;
78 static GTY(()) tree vector_last_type;
79 static GTY(()) int vector_last_nunits;
80
81 /* Return a suitable vector types made of SUBPARTS units each of mode
82 "word_mode" (the global variable). */
83 static tree
84 build_word_mode_vector_type (int nunits)
85 {
86 if (!vector_inner_type)
87 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
88 else if (vector_last_nunits == nunits)
89 {
90 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
91 return vector_last_type;
92 }
93
94 /* We build a new type, but we canonicalize it nevertheless,
95 because it still saves some memory. */
96 vector_last_nunits = nunits;
97 vector_last_type = type_hash_canon (nunits,
98 build_vector_type (vector_inner_type,
99 nunits));
100 return vector_last_type;
101 }
102
103 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
104 tree, tree, tree, tree, tree, enum tree_code);
105
106 static inline tree
107 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
108 tree t, tree bitsize, tree bitpos)
109 {
110 if (bitpos)
111 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
112 else
113 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
114 }
115
116 static tree
117 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
118 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
119 enum tree_code code)
120 {
121 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
122 return gimplify_build1 (gsi, code, inner_type, a);
123 }
124
125 static tree
126 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
127 tree bitpos, tree bitsize, enum tree_code code)
128 {
129 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
130 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
131 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
132 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
133 return gimplify_build2 (gsi, code, inner_type, a, b);
134 }
135
136 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
137
138 INNER_TYPE is the type of A and B elements
139
140 returned expression is of signed integer type with the
141 size equal to the size of INNER_TYPE. */
142 static tree
143 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
144 tree bitpos, tree bitsize, enum tree_code code)
145 {
146 tree comp_type;
147
148 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
149 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
150
151 comp_type = build_nonstandard_integer_type
152 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
153
154 return gimplify_build3 (gsi, COND_EXPR, comp_type,
155 fold_build2 (code, boolean_type_node, a, b),
156 build_int_cst (comp_type, -1),
157 build_int_cst (comp_type, 0));
158 }
159
160 /* Expand vector addition to scalars. This does bit twiddling
161 in order to increase parallelism:
162
163 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
164 (a ^ b) & 0x80808080
165
166 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
167 (a ^ ~b) & 0x80808080
168
169 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
170
171 This optimization should be done only if 4 vector items or more
172 fit into a word. */
173 static tree
174 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
175 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
176 enum tree_code code)
177 {
178 tree inner_type = TREE_TYPE (TREE_TYPE (a));
179 unsigned HOST_WIDE_INT max;
180 tree low_bits, high_bits, a_low, b_low, result_low, signs;
181
182 max = GET_MODE_MASK (TYPE_MODE (inner_type));
183 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
184 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
185
186 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
187 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
188
189 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
190 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
191 if (code == PLUS_EXPR)
192 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
193 else
194 {
195 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
196 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
197 }
198
199 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
200 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
201 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
202 }
203
204 static tree
205 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
206 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
207 tree bitsize ATTRIBUTE_UNUSED,
208 enum tree_code code ATTRIBUTE_UNUSED)
209 {
210 tree inner_type = TREE_TYPE (TREE_TYPE (b));
211 HOST_WIDE_INT max;
212 tree low_bits, high_bits, b_low, result_low, signs;
213
214 max = GET_MODE_MASK (TYPE_MODE (inner_type));
215 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
216 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
217
218 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
219
220 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
221 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
222 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
223 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
224 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
225 }
226
227 /* Expand a vector operation to scalars, by using many operations
228 whose type is the vector type's inner type. */
229 static tree
230 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
231 tree type, tree inner_type,
232 tree a, tree b, enum tree_code code)
233 {
234 VEC(constructor_elt,gc) *v;
235 tree part_width = TYPE_SIZE (inner_type);
236 tree index = bitsize_int (0);
237 int nunits = TYPE_VECTOR_SUBPARTS (type);
238 int delta = tree_low_cst (part_width, 1)
239 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
240 int i;
241 location_t loc = gimple_location (gsi_stmt (*gsi));
242
243 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
244 warning_at (loc, OPT_Wvector_operation_performance,
245 "vector operation will be expanded piecewise");
246 else
247 warning_at (loc, OPT_Wvector_operation_performance,
248 "vector operation will be expanded in parallel");
249
250 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
251 for (i = 0; i < nunits;
252 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
253 {
254 tree result = f (gsi, inner_type, a, b, index, part_width, code);
255 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
256 ce->index = NULL_TREE;
257 ce->value = result;
258 }
259
260 return build_constructor (type, v);
261 }
262
263 /* Expand a vector operation to scalars with the freedom to use
264 a scalar integer type, or to use a different size for the items
265 in the vector type. */
266 static tree
267 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
268 tree a, tree b,
269 enum tree_code code)
270 {
271 tree result, compute_type;
272 enum machine_mode mode;
273 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
274 location_t loc = gimple_location (gsi_stmt (*gsi));
275
276 /* We have three strategies. If the type is already correct, just do
277 the operation an element at a time. Else, if the vector is wider than
278 one word, do it a word at a time; finally, if the vector is smaller
279 than one word, do it as a scalar. */
280 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
281 return expand_vector_piecewise (gsi, f,
282 type, TREE_TYPE (type),
283 a, b, code);
284 else if (n_words > 1)
285 {
286 tree word_type = build_word_mode_vector_type (n_words);
287 result = expand_vector_piecewise (gsi, f,
288 word_type, TREE_TYPE (word_type),
289 a, b, code);
290 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
291 GSI_SAME_STMT);
292 }
293 else
294 {
295 /* Use a single scalar operation with a mode no wider than word_mode. */
296 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
297 compute_type = lang_hooks.types.type_for_mode (mode, 1);
298 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
299 warning_at (loc, OPT_Wvector_operation_performance,
300 "vector operation will be expanded with a "
301 "single scalar operation");
302 }
303
304 return result;
305 }
306
307 /* Expand a vector operation to scalars; for integer types we can use
308 special bit twiddling tricks to do the sums a word at a time, using
309 function F_PARALLEL instead of F. These tricks are done only if
310 they can process at least four items, that is, only if the vector
311 holds at least four items and if a word can hold four items. */
312 static tree
313 expand_vector_addition (gimple_stmt_iterator *gsi,
314 elem_op_func f, elem_op_func f_parallel,
315 tree type, tree a, tree b, enum tree_code code)
316 {
317 int parts_per_word = UNITS_PER_WORD
318 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
319
320 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
321 && parts_per_word >= 4
322 && TYPE_VECTOR_SUBPARTS (type) >= 4)
323 return expand_vector_parallel (gsi, f_parallel,
324 type, a, b, code);
325 else
326 return expand_vector_piecewise (gsi, f,
327 type, TREE_TYPE (type),
328 a, b, code);
329 }
330
331 /* Check if vector VEC consists of all the equal elements and
332 that the number of elements corresponds to the type of VEC.
333 The function returns first element of the vector
334 or NULL_TREE if the vector is not uniform. */
335 static tree
336 uniform_vector_p (tree vec)
337 {
338 tree first, t;
339 unsigned i;
340
341 if (vec == NULL_TREE)
342 return NULL_TREE;
343
344 if (TREE_CODE (vec) == VECTOR_CST)
345 {
346 first = VECTOR_CST_ELT (vec, 0);
347 for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
348 if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
349 return NULL_TREE;
350
351 return first;
352 }
353
354 else if (TREE_CODE (vec) == CONSTRUCTOR)
355 {
356 first = error_mark_node;
357
358 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
359 {
360 if (i == 0)
361 {
362 first = t;
363 continue;
364 }
365 if (!operand_equal_p (first, t, 0))
366 return NULL_TREE;
367 }
368 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
369 return NULL_TREE;
370
371 return first;
372 }
373
374 return NULL_TREE;
375 }
376
377 /* Try to expand vector comparison expression OP0 CODE OP1 by
378 querying optab if the following expression:
379 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
380 can be expanded. */
381 static tree
382 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
383 tree op1, enum tree_code code)
384 {
385 tree t;
386 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
387 t = expand_vector_piecewise (gsi, do_compare, type,
388 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
389 else
390 t = NULL_TREE;
391
392 return t;
393 }
394
395 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
396 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
397 the result if successful, otherwise return NULL_TREE. */
398 static tree
399 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
400 {
401 optab op;
402 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
403 bool scalar_shift = true;
404
405 for (i = 1; i < nunits; i++)
406 {
407 if (shiftcnts[i] != shiftcnts[0])
408 scalar_shift = false;
409 }
410
411 if (scalar_shift && shiftcnts[0] == 0)
412 return op0;
413
414 if (scalar_shift)
415 {
416 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
417 if (op != NULL
418 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
419 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
420 build_int_cst (NULL_TREE, shiftcnts[0]));
421 }
422
423 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
424 if (op != NULL
425 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
426 {
427 tree *vec = XALLOCAVEC (tree, nunits);
428 for (i = 0; i < nunits; i++)
429 vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
430 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
431 build_vector (type, vec));
432 }
433
434 return NULL_TREE;
435 }
436
437 /* Try to expand integer vector division by constant using
438 widening multiply, shifts and additions. */
439 static tree
440 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
441 tree op1, enum tree_code code)
442 {
443 bool use_pow2 = true;
444 bool has_vector_shift = true;
445 int mode = -1, this_mode;
446 int pre_shift = -1, post_shift;
447 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
448 int *shifts = XALLOCAVEC (int, nunits * 4);
449 int *pre_shifts = shifts + nunits;
450 int *post_shifts = pre_shifts + nunits;
451 int *shift_temps = post_shifts + nunits;
452 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
453 int prec = TYPE_PRECISION (TREE_TYPE (type));
454 int dummy_int;
455 unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
456 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
457 optab op;
458 tree *vec;
459 unsigned char *sel = NULL;
460 tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem, decl_e, decl_o;
461
462 if (prec > HOST_BITS_PER_WIDE_INT)
463 return NULL_TREE;
464
465 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
466 if (op == NULL
467 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
468 has_vector_shift = false;
469
470 /* Analysis phase. Determine if all op1 elements are either power
471 of two and it is possible to expand it using shifts (or for remainder
472 using masking). Additionally compute the multiplicative constants
473 and pre and post shifts if the division is to be expanded using
474 widening or high part multiplication plus shifts. */
475 for (i = 0; i < nunits; i++)
476 {
477 tree cst = VECTOR_CST_ELT (op1, i);
478 unsigned HOST_WIDE_INT ml;
479
480 if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
481 return NULL_TREE;
482 pre_shifts[i] = 0;
483 post_shifts[i] = 0;
484 mulc[i] = 0;
485 if (use_pow2
486 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
487 use_pow2 = false;
488 if (use_pow2)
489 {
490 shifts[i] = tree_log2 (cst);
491 if (shifts[i] != shifts[0]
492 && code == TRUNC_DIV_EXPR
493 && !has_vector_shift)
494 use_pow2 = false;
495 }
496 if (mode == -2)
497 continue;
498 if (unsignedp)
499 {
500 unsigned HOST_WIDE_INT mh;
501 unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
502
503 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
504 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
505 return NULL_TREE;
506
507 if (d <= 1)
508 {
509 mode = -2;
510 continue;
511 }
512
513 /* Find a suitable multiplier and right shift count
514 instead of multiplying with D. */
515 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
516
517 /* If the suggested multiplier is more than SIZE bits, we can
518 do better for even divisors, using an initial right shift. */
519 if ((mh != 0 && (d & 1) == 0)
520 || (!has_vector_shift && pre_shift != -1))
521 {
522 if (has_vector_shift)
523 pre_shift = floor_log2 (d & -d);
524 else if (pre_shift == -1)
525 {
526 unsigned int j;
527 for (j = 0; j < nunits; j++)
528 {
529 tree cst2 = VECTOR_CST_ELT (op1, j);
530 unsigned HOST_WIDE_INT d2;
531 int this_pre_shift;
532
533 if (!host_integerp (cst2, 1))
534 return NULL_TREE;
535 d2 = tree_low_cst (cst2, 1) & mask;
536 if (d2 == 0)
537 return NULL_TREE;
538 this_pre_shift = floor_log2 (d2 & -d2);
539 if (pre_shift == -1 || this_pre_shift < pre_shift)
540 pre_shift = this_pre_shift;
541 }
542 if (i != 0 && pre_shift != 0)
543 {
544 /* Restart. */
545 i = -1U;
546 mode = -1;
547 continue;
548 }
549 }
550 if (pre_shift != 0)
551 {
552 if ((d >> pre_shift) <= 1)
553 {
554 mode = -2;
555 continue;
556 }
557 mh = choose_multiplier (d >> pre_shift, prec,
558 prec - pre_shift,
559 &ml, &post_shift, &dummy_int);
560 gcc_assert (!mh);
561 pre_shifts[i] = pre_shift;
562 }
563 }
564 if (!mh)
565 this_mode = 0;
566 else
567 this_mode = 1;
568 }
569 else
570 {
571 HOST_WIDE_INT d = tree_low_cst (cst, 0);
572 unsigned HOST_WIDE_INT abs_d;
573
574 if (d == -1)
575 return NULL_TREE;
576
577 /* Since d might be INT_MIN, we have to cast to
578 unsigned HOST_WIDE_INT before negating to avoid
579 undefined signed overflow. */
580 abs_d = (d >= 0
581 ? (unsigned HOST_WIDE_INT) d
582 : - (unsigned HOST_WIDE_INT) d);
583
584 /* n rem d = n rem -d */
585 if (code == TRUNC_MOD_EXPR && d < 0)
586 d = abs_d;
587 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
588 {
589 /* This case is not handled correctly below. */
590 mode = -2;
591 continue;
592 }
593 if (abs_d <= 1)
594 {
595 mode = -2;
596 continue;
597 }
598
599 choose_multiplier (abs_d, prec, prec - 1, &ml,
600 &post_shift, &dummy_int);
601 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
602 {
603 this_mode = 4 + (d < 0);
604 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
605 }
606 else
607 this_mode = 2 + (d < 0);
608 }
609 mulc[i] = ml;
610 post_shifts[i] = post_shift;
611 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
612 || post_shift >= prec
613 || pre_shifts[i] >= prec)
614 this_mode = -2;
615
616 if (i == 0)
617 mode = this_mode;
618 else if (mode != this_mode)
619 mode = -2;
620 }
621
622 vec = XALLOCAVEC (tree, nunits);
623
624 if (use_pow2)
625 {
626 tree addend = NULL_TREE;
627 if (!unsignedp)
628 {
629 tree uns_type;
630
631 /* Both division and remainder sequences need
632 op0 < 0 ? mask : 0 computed. It can be either computed as
633 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
634 if none of the shifts is 0, or as the conditional. */
635 for (i = 0; i < nunits; i++)
636 if (shifts[i] == 0)
637 break;
638 uns_type
639 = build_vector_type (build_nonstandard_integer_type (prec, 1),
640 nunits);
641 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
642 {
643 for (i = 0; i < nunits; i++)
644 shift_temps[i] = prec - 1;
645 cur_op = add_rshift (gsi, type, op0, shift_temps);
646 if (cur_op != NULL_TREE)
647 {
648 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
649 uns_type, cur_op);
650 for (i = 0; i < nunits; i++)
651 shift_temps[i] = prec - shifts[i];
652 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
653 if (cur_op != NULL_TREE)
654 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
655 type, cur_op);
656 }
657 }
658 if (addend == NULL_TREE
659 && expand_vec_cond_expr_p (type, type))
660 {
661 tree zero, cst, cond;
662 gimple stmt;
663
664 zero = build_zero_cst (type);
665 cond = build2 (LT_EXPR, type, op0, zero);
666 for (i = 0; i < nunits; i++)
667 vec[i] = build_int_cst (TREE_TYPE (type),
668 ((unsigned HOST_WIDE_INT) 1
669 << shifts[i]) - 1);
670 cst = build_vector (type, vec);
671 addend = create_tmp_reg (type, NULL);
672 add_referenced_var (addend);
673 addend = make_ssa_name (addend, NULL);
674 stmt = gimple_build_assign_with_ops3 (VEC_COND_EXPR, addend,
675 cond, cst, zero);
676 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
677 }
678 }
679 if (code == TRUNC_DIV_EXPR)
680 {
681 if (unsignedp)
682 {
683 /* q = op0 >> shift; */
684 cur_op = add_rshift (gsi, type, op0, shifts);
685 if (cur_op != NULL_TREE)
686 return cur_op;
687 }
688 else if (addend != NULL_TREE)
689 {
690 /* t1 = op0 + addend;
691 q = t1 >> shift; */
692 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
693 if (op != NULL
694 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
695 {
696 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
697 cur_op = add_rshift (gsi, type, cur_op, shifts);
698 if (cur_op != NULL_TREE)
699 return cur_op;
700 }
701 }
702 }
703 else
704 {
705 tree mask;
706 for (i = 0; i < nunits; i++)
707 vec[i] = build_int_cst (TREE_TYPE (type),
708 ((unsigned HOST_WIDE_INT) 1
709 << shifts[i]) - 1);
710 mask = build_vector (type, vec);
711 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
712 if (op != NULL
713 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
714 {
715 if (unsignedp)
716 /* r = op0 & mask; */
717 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
718 else if (addend != NULL_TREE)
719 {
720 /* t1 = op0 + addend;
721 t2 = t1 & mask;
722 r = t2 - addend; */
723 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
724 if (op != NULL
725 && optab_handler (op, TYPE_MODE (type))
726 != CODE_FOR_nothing)
727 {
728 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
729 addend);
730 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
731 cur_op, mask);
732 op = optab_for_tree_code (MINUS_EXPR, type,
733 optab_default);
734 if (op != NULL
735 && optab_handler (op, TYPE_MODE (type))
736 != CODE_FOR_nothing)
737 return gimplify_build2 (gsi, MINUS_EXPR, type,
738 cur_op, addend);
739 }
740 }
741 }
742 }
743 }
744
745 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
746 return NULL_TREE;
747
748 op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
749 if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
750 wider_type = decl_e = decl_o = NULL_TREE;
751 else
752 {
753 wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
754 wider_type = build_vector_type (wider_type, nunits / 2);
755 if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
756 || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
757 != GET_MODE_BITSIZE (TYPE_MODE (type)))
758 return NULL_TREE;
759
760 sel = XALLOCAVEC (unsigned char, nunits);
761
762 if (targetm.vectorize.builtin_mul_widen_even
763 && targetm.vectorize.builtin_mul_widen_odd
764 && (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
765 && (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
766 && (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
767 == TYPE_MODE (wider_type)))
768 {
769 for (i = 0; i < nunits; i++)
770 sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
771 if (!can_vec_perm_p (TYPE_MODE (wider_type), false, sel))
772 decl_e = decl_o = NULL_TREE;
773 }
774 else
775 decl_e = decl_o = NULL_TREE;
776
777 if (decl_e == NULL_TREE)
778 {
779 op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
780 type, optab_default);
781 if (op == NULL
782 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
783 return NULL_TREE;
784 op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
785 type, optab_default);
786 if (op == NULL
787 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
788 return NULL_TREE;
789
790 for (i = 0; i < nunits; i++)
791 sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
792 if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
793 return NULL_TREE;
794 }
795 }
796
797 cur_op = op0;
798
799 switch (mode)
800 {
801 case 0:
802 gcc_assert (unsignedp);
803 /* t1 = oprnd0 >> pre_shift;
804 t2 = t1 h* ml;
805 q = t2 >> post_shift; */
806 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
807 if (cur_op == NULL_TREE)
808 return NULL_TREE;
809 break;
810 case 1:
811 gcc_assert (unsignedp);
812 for (i = 0; i < nunits; i++)
813 {
814 shift_temps[i] = 1;
815 post_shifts[i]--;
816 }
817 break;
818 case 2:
819 case 3:
820 case 4:
821 case 5:
822 gcc_assert (!unsignedp);
823 for (i = 0; i < nunits; i++)
824 shift_temps[i] = prec - 1;
825 break;
826 default:
827 return NULL_TREE;
828 }
829
830 for (i = 0; i < nunits; i++)
831 vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
832 mulcst = build_vector (type, vec);
833 if (wider_type == NULL_TREE)
834 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
835 else
836 {
837 for (i = 0; i < nunits; i++)
838 vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
839 perm_mask = build_vector (type, vec);
840
841 if (decl_e != NULL_TREE)
842 {
843 gimple call;
844
845 call = gimple_build_call (decl_e, 2, cur_op, mulcst);
846 mhi = create_tmp_reg (wider_type, NULL);
847 add_referenced_var (mhi);
848 mhi = make_ssa_name (mhi, call);
849 gimple_call_set_lhs (call, mhi);
850 gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
851
852 call = gimple_build_call (decl_o, 2, cur_op, mulcst);
853 mlo = create_tmp_reg (wider_type, NULL);
854 add_referenced_var (mlo);
855 mlo = make_ssa_name (mlo, call);
856 gimple_call_set_lhs (call, mlo);
857 gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
858 }
859 else
860 {
861 mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
862 cur_op, mulcst);
863 mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
864 cur_op, mulcst);
865 }
866
867 mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
868 mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
869 if (BYTES_BIG_ENDIAN)
870 cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo,
871 perm_mask);
872 else
873 cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi,
874 perm_mask);
875 }
876
877 switch (mode)
878 {
879 case 0:
880 /* t1 = oprnd0 >> pre_shift;
881 t2 = t1 h* ml;
882 q = t2 >> post_shift; */
883 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
884 break;
885 case 1:
886 /* t1 = oprnd0 h* ml;
887 t2 = oprnd0 - t1;
888 t3 = t2 >> 1;
889 t4 = t1 + t3;
890 q = t4 >> (post_shift - 1); */
891 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
892 if (op == NULL
893 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
894 return NULL_TREE;
895 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
896 tem = add_rshift (gsi, type, tem, shift_temps);
897 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
898 if (op == NULL
899 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
900 return NULL_TREE;
901 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
902 cur_op = add_rshift (gsi, type, tem, post_shifts);
903 if (cur_op == NULL_TREE)
904 return NULL_TREE;
905 break;
906 case 2:
907 case 3:
908 case 4:
909 case 5:
910 /* t1 = oprnd0 h* ml;
911 t2 = t1; [ iff (mode & 2) != 0 ]
912 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
913 t3 = t2 >> post_shift;
914 t4 = oprnd0 >> (prec - 1);
915 q = t3 - t4; [ iff (mode & 1) == 0 ]
916 q = t4 - t3; [ iff (mode & 1) != 0 ] */
917 if ((mode & 2) == 0)
918 {
919 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
920 if (op == NULL
921 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
922 return NULL_TREE;
923 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
924 }
925 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
926 if (cur_op == NULL_TREE)
927 return NULL_TREE;
928 tem = add_rshift (gsi, type, op0, shift_temps);
929 if (tem == NULL_TREE)
930 return NULL_TREE;
931 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
932 if (op == NULL
933 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
934 return NULL_TREE;
935 if ((mode & 1) == 0)
936 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
937 else
938 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
939 break;
940 default:
941 gcc_unreachable ();
942 }
943
944 if (code == TRUNC_DIV_EXPR)
945 return cur_op;
946
947 /* We divided. Now finish by:
948 t1 = q * oprnd1;
949 r = oprnd0 - t1; */
950 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
951 if (op == NULL
952 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
953 return NULL_TREE;
954 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
955 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
956 if (op == NULL
957 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
958 return NULL_TREE;
959 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
960 }
961
962 static tree
963 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
964 gimple assign, enum tree_code code)
965 {
966 enum machine_mode compute_mode = TYPE_MODE (compute_type);
967
968 /* If the compute mode is not a vector mode (hence we are not decomposing
969 a BLKmode vector to smaller, hardware-supported vectors), we may want
970 to expand the operations in parallel. */
971 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
972 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
973 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
974 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
975 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
976 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
977 switch (code)
978 {
979 case PLUS_EXPR:
980 case MINUS_EXPR:
981 if (!TYPE_OVERFLOW_TRAPS (type))
982 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
983 gimple_assign_rhs1 (assign),
984 gimple_assign_rhs2 (assign), code);
985 break;
986
987 case NEGATE_EXPR:
988 if (!TYPE_OVERFLOW_TRAPS (type))
989 return expand_vector_addition (gsi, do_unop, do_negate, type,
990 gimple_assign_rhs1 (assign),
991 NULL_TREE, code);
992 break;
993
994 case BIT_AND_EXPR:
995 case BIT_IOR_EXPR:
996 case BIT_XOR_EXPR:
997 return expand_vector_parallel (gsi, do_binop, type,
998 gimple_assign_rhs1 (assign),
999 gimple_assign_rhs2 (assign), code);
1000
1001 case BIT_NOT_EXPR:
1002 return expand_vector_parallel (gsi, do_unop, type,
1003 gimple_assign_rhs1 (assign),
1004 NULL_TREE, code);
1005 case EQ_EXPR:
1006 case NE_EXPR:
1007 case GT_EXPR:
1008 case LT_EXPR:
1009 case GE_EXPR:
1010 case LE_EXPR:
1011 case UNEQ_EXPR:
1012 case UNGT_EXPR:
1013 case UNLT_EXPR:
1014 case UNGE_EXPR:
1015 case UNLE_EXPR:
1016 case LTGT_EXPR:
1017 case ORDERED_EXPR:
1018 case UNORDERED_EXPR:
1019 {
1020 tree rhs1 = gimple_assign_rhs1 (assign);
1021 tree rhs2 = gimple_assign_rhs2 (assign);
1022
1023 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1024 }
1025
1026 case TRUNC_DIV_EXPR:
1027 case TRUNC_MOD_EXPR:
1028 {
1029 tree rhs1 = gimple_assign_rhs1 (assign);
1030 tree rhs2 = gimple_assign_rhs2 (assign);
1031 tree ret;
1032
1033 if (!optimize
1034 || !VECTOR_INTEGER_TYPE_P (type)
1035 || TREE_CODE (rhs2) != VECTOR_CST)
1036 break;
1037
1038 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1039 if (ret != NULL_TREE)
1040 return ret;
1041 break;
1042 }
1043
1044 default:
1045 break;
1046 }
1047
1048 if (TREE_CODE_CLASS (code) == tcc_unary)
1049 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1050 gimple_assign_rhs1 (assign),
1051 NULL_TREE, code);
1052 else
1053 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1054 gimple_assign_rhs1 (assign),
1055 gimple_assign_rhs2 (assign), code);
1056 }
1057 \f
1058 /* Return a type for the widest vector mode whose components are of type
1059 TYPE, or NULL_TREE if none is found. */
1060
1061 static tree
1062 type_for_widest_vector_mode (tree type, optab op)
1063 {
1064 enum machine_mode inner_mode = TYPE_MODE (type);
1065 enum machine_mode best_mode = VOIDmode, mode;
1066 int best_nunits = 0;
1067
1068 if (SCALAR_FLOAT_MODE_P (inner_mode))
1069 mode = MIN_MODE_VECTOR_FLOAT;
1070 else if (SCALAR_FRACT_MODE_P (inner_mode))
1071 mode = MIN_MODE_VECTOR_FRACT;
1072 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1073 mode = MIN_MODE_VECTOR_UFRACT;
1074 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1075 mode = MIN_MODE_VECTOR_ACCUM;
1076 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1077 mode = MIN_MODE_VECTOR_UACCUM;
1078 else
1079 mode = MIN_MODE_VECTOR_INT;
1080
1081 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1082 if (GET_MODE_INNER (mode) == inner_mode
1083 && GET_MODE_NUNITS (mode) > best_nunits
1084 && optab_handler (op, mode) != CODE_FOR_nothing)
1085 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1086
1087 if (best_mode == VOIDmode)
1088 return NULL_TREE;
1089 else
1090 return build_vector_type_for_mode (type, best_mode);
1091 }
1092
1093
1094 /* Build a reference to the element of the vector VECT. Function
1095 returns either the element itself, either BIT_FIELD_REF, or an
1096 ARRAY_REF expression.
1097
1098 GSI is required to insert temporary variables while building a
1099 refernece to the element of the vector VECT.
1100
1101 PTMPVEC is a pointer to the temporary variable for caching
1102 purposes. In case when PTMPVEC is NULL new temporary variable
1103 will be created. */
1104 static tree
1105 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1106 {
1107 tree vect_type, vect_elt_type;
1108 gimple asgn;
1109 tree tmpvec;
1110 tree arraytype;
1111 bool need_asgn = true;
1112 unsigned int elements;
1113
1114 vect_type = TREE_TYPE (vect);
1115 vect_elt_type = TREE_TYPE (vect_type);
1116 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1117
1118 if (TREE_CODE (idx) == INTEGER_CST)
1119 {
1120 unsigned HOST_WIDE_INT index;
1121
1122 /* Given that we're about to compute a binary modulus,
1123 we don't care about the high bits of the value. */
1124 index = TREE_INT_CST_LOW (idx);
1125 if (!host_integerp (idx, 1) || index >= elements)
1126 {
1127 index &= elements - 1;
1128 idx = build_int_cst (TREE_TYPE (idx), index);
1129 }
1130
1131 /* When lowering a vector statement sequence do some easy
1132 simplification by looking through intermediate vector results. */
1133 if (TREE_CODE (vect) == SSA_NAME)
1134 {
1135 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1136 if (is_gimple_assign (def_stmt)
1137 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1138 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1139 vect = gimple_assign_rhs1 (def_stmt);
1140 }
1141
1142 if (TREE_CODE (vect) == VECTOR_CST)
1143 return VECTOR_CST_ELT (vect, index);
1144 else if (TREE_CODE (vect) == CONSTRUCTOR)
1145 {
1146 unsigned i;
1147 tree elt_i, elt_v;
1148
1149 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
1150 if (operand_equal_p (elt_i, idx, 0))
1151 return elt_v;
1152 return build_zero_cst (vect_elt_type);
1153 }
1154 else
1155 {
1156 tree size = TYPE_SIZE (vect_elt_type);
1157 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1158 size);
1159 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1160 }
1161 }
1162
1163 if (!ptmpvec)
1164 tmpvec = create_tmp_var (vect_type, "vectmp");
1165 else if (!*ptmpvec)
1166 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1167 else
1168 {
1169 tmpvec = *ptmpvec;
1170 need_asgn = false;
1171 }
1172
1173 if (need_asgn)
1174 {
1175 TREE_ADDRESSABLE (tmpvec) = 1;
1176 asgn = gimple_build_assign (tmpvec, vect);
1177 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1178 }
1179
1180 arraytype = build_array_type_nelts (vect_elt_type, elements);
1181 return build4 (ARRAY_REF, vect_elt_type,
1182 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1183 idx, NULL_TREE, NULL_TREE);
1184 }
1185
1186 /* Check if VEC_PERM_EXPR within the given setting is supported
1187 by hardware, or lower it piecewise.
1188
1189 When VEC_PERM_EXPR has the same first and second operands:
1190 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1191 {v0[mask[0]], v0[mask[1]], ...}
1192 MASK and V0 must have the same number of elements.
1193
1194 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1195 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1196 V0 and V1 must have the same type. MASK, V0, V1 must have the
1197 same number of arguments. */
1198
1199 static void
1200 lower_vec_perm (gimple_stmt_iterator *gsi)
1201 {
1202 gimple stmt = gsi_stmt (*gsi);
1203 tree mask = gimple_assign_rhs3 (stmt);
1204 tree vec0 = gimple_assign_rhs1 (stmt);
1205 tree vec1 = gimple_assign_rhs2 (stmt);
1206 tree vect_type = TREE_TYPE (vec0);
1207 tree mask_type = TREE_TYPE (mask);
1208 tree vect_elt_type = TREE_TYPE (vect_type);
1209 tree mask_elt_type = TREE_TYPE (mask_type);
1210 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1211 VEC(constructor_elt,gc) *v;
1212 tree constr, t, si, i_val;
1213 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1214 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1215 location_t loc = gimple_location (gsi_stmt (*gsi));
1216 unsigned i;
1217
1218 if (TREE_CODE (mask) == SSA_NAME)
1219 {
1220 gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1221 if (is_gimple_assign (def_stmt)
1222 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1223 mask = gimple_assign_rhs1 (def_stmt);
1224 }
1225
1226 if (TREE_CODE (mask) == VECTOR_CST)
1227 {
1228 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1229
1230 for (i = 0; i < elements; ++i)
1231 sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1232 & (2 * elements - 1));
1233
1234 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1235 {
1236 gimple_assign_set_rhs3 (stmt, mask);
1237 update_stmt (stmt);
1238 return;
1239 }
1240 }
1241 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1242 return;
1243
1244 warning_at (loc, OPT_Wvector_operation_performance,
1245 "vector shuffling operation will be expanded piecewise");
1246
1247 v = VEC_alloc (constructor_elt, gc, elements);
1248 for (i = 0; i < elements; i++)
1249 {
1250 si = size_int (i);
1251 i_val = vector_element (gsi, mask, si, &masktmp);
1252
1253 if (TREE_CODE (i_val) == INTEGER_CST)
1254 {
1255 unsigned HOST_WIDE_INT index;
1256
1257 index = TREE_INT_CST_LOW (i_val);
1258 if (!host_integerp (i_val, 1) || index >= elements)
1259 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1260
1261 if (two_operand_p && (index & elements) != 0)
1262 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1263 else
1264 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1265
1266 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1267 true, GSI_SAME_STMT);
1268 }
1269 else
1270 {
1271 tree cond = NULL_TREE, v0_val;
1272
1273 if (two_operand_p)
1274 {
1275 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1276 build_int_cst (mask_elt_type, elements));
1277 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1278 true, GSI_SAME_STMT);
1279 }
1280
1281 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1282 build_int_cst (mask_elt_type, elements - 1));
1283 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1284 true, GSI_SAME_STMT);
1285
1286 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1287 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1288 true, GSI_SAME_STMT);
1289
1290 if (two_operand_p)
1291 {
1292 tree v1_val;
1293
1294 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1295 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1296 true, GSI_SAME_STMT);
1297
1298 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1299 cond, build_zero_cst (mask_elt_type));
1300 cond = fold_build3 (COND_EXPR, vect_elt_type,
1301 cond, v0_val, v1_val);
1302 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1303 true, GSI_SAME_STMT);
1304 }
1305 else
1306 t = v0_val;
1307 }
1308
1309 CONSTRUCTOR_APPEND_ELT (v, si, t);
1310 }
1311
1312 constr = build_constructor (vect_type, v);
1313 gimple_assign_set_rhs_from_tree (gsi, constr);
1314 update_stmt (gsi_stmt (*gsi));
1315 }
1316
1317 /* Process one statement. If we identify a vector operation, expand it. */
1318
1319 static void
1320 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1321 {
1322 gimple stmt = gsi_stmt (*gsi);
1323 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1324 enum tree_code code;
1325 enum machine_mode compute_mode;
1326 optab op = NULL;
1327 enum gimple_rhs_class rhs_class;
1328 tree new_rhs;
1329
1330 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1331 return;
1332
1333 code = gimple_assign_rhs_code (stmt);
1334 rhs_class = get_gimple_rhs_class (code);
1335 lhs = gimple_assign_lhs (stmt);
1336
1337 if (code == VEC_PERM_EXPR)
1338 {
1339 lower_vec_perm (gsi);
1340 return;
1341 }
1342
1343 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1344 return;
1345
1346 rhs1 = gimple_assign_rhs1 (stmt);
1347 type = gimple_expr_type (stmt);
1348 if (rhs_class == GIMPLE_BINARY_RHS)
1349 rhs2 = gimple_assign_rhs2 (stmt);
1350
1351 if (TREE_CODE (type) != VECTOR_TYPE)
1352 return;
1353
1354 if (code == NOP_EXPR
1355 || code == FLOAT_EXPR
1356 || code == FIX_TRUNC_EXPR
1357 || code == VIEW_CONVERT_EXPR)
1358 return;
1359
1360 gcc_assert (code != CONVERT_EXPR);
1361
1362 /* The signedness is determined from input argument. */
1363 if (code == VEC_UNPACK_FLOAT_HI_EXPR
1364 || code == VEC_UNPACK_FLOAT_LO_EXPR)
1365 type = TREE_TYPE (rhs1);
1366
1367 /* Choose between vector shift/rotate by vector and vector shift/rotate by
1368 scalar */
1369 if (code == LSHIFT_EXPR
1370 || code == RSHIFT_EXPR
1371 || code == LROTATE_EXPR
1372 || code == RROTATE_EXPR)
1373 {
1374 optab opv;
1375
1376 /* Check whether we have vector <op> {x,x,x,x} where x
1377 could be a scalar variable or a constant. Transform
1378 vector <op> {x,x,x,x} ==> vector <op> scalar. */
1379 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1380 {
1381 tree first;
1382 gimple def_stmt;
1383
1384 if ((TREE_CODE (rhs2) == VECTOR_CST
1385 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1386 || (TREE_CODE (rhs2) == SSA_NAME
1387 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1388 && gimple_assign_single_p (def_stmt)
1389 && (first = uniform_vector_p
1390 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1391 {
1392 gimple_assign_set_rhs2 (stmt, first);
1393 update_stmt (stmt);
1394 rhs2 = first;
1395 }
1396 }
1397
1398 opv = optab_for_tree_code (code, type, optab_vector);
1399 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1400 op = opv;
1401 else
1402 {
1403 op = optab_for_tree_code (code, type, optab_scalar);
1404
1405 /* The rtl expander will expand vector/scalar as vector/vector
1406 if necessary. Don't bother converting the stmt here. */
1407 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1408 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1409 return;
1410 }
1411 }
1412 else
1413 op = optab_for_tree_code (code, type, optab_default);
1414
1415 /* For widening/narrowing vector operations, the relevant type is of the
1416 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1417 calculated in the same way above. */
1418 if (code == WIDEN_SUM_EXPR
1419 || code == VEC_WIDEN_MULT_HI_EXPR
1420 || code == VEC_WIDEN_MULT_LO_EXPR
1421 || code == VEC_UNPACK_HI_EXPR
1422 || code == VEC_UNPACK_LO_EXPR
1423 || code == VEC_PACK_TRUNC_EXPR
1424 || code == VEC_PACK_SAT_EXPR
1425 || code == VEC_PACK_FIX_TRUNC_EXPR
1426 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1427 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1428 type = TREE_TYPE (rhs1);
1429
1430 /* Optabs will try converting a negation into a subtraction, so
1431 look for it as well. TODO: negation of floating-point vectors
1432 might be turned into an exclusive OR toggling the sign bit. */
1433 if (op == NULL
1434 && code == NEGATE_EXPR
1435 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1436 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1437
1438 /* For very wide vectors, try using a smaller vector mode. */
1439 compute_type = type;
1440 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1441 {
1442 tree vector_compute_type
1443 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1444 if (vector_compute_type != NULL_TREE
1445 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1446 < TYPE_VECTOR_SUBPARTS (compute_type))
1447 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1448 != CODE_FOR_nothing))
1449 compute_type = vector_compute_type;
1450 }
1451
1452 /* If we are breaking a BLKmode vector into smaller pieces,
1453 type_for_widest_vector_mode has already looked into the optab,
1454 so skip these checks. */
1455 if (compute_type == type)
1456 {
1457 compute_mode = TYPE_MODE (compute_type);
1458 if (VECTOR_MODE_P (compute_mode)
1459 && op != NULL
1460 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1461 return;
1462 else
1463 /* There is no operation in hardware, so fall back to scalars. */
1464 compute_type = TREE_TYPE (type);
1465 }
1466
1467 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1468 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1469
1470 /* Leave expression untouched for later expansion. */
1471 if (new_rhs == NULL_TREE)
1472 return;
1473
1474 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1475 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1476 new_rhs);
1477
1478 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1479 way to do it is change expand_vector_operation and its callees to
1480 return a tree_code, RHS1 and RHS2 instead of a tree. */
1481 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1482 update_stmt (gsi_stmt (*gsi));
1483 }
1484 \f
1485 /* Use this to lower vector operations introduced by the vectorizer,
1486 if it may need the bit-twiddling tricks implemented in this file. */
1487
1488 static bool
1489 gate_expand_vector_operations_ssa (void)
1490 {
1491 return optimize == 0;
1492 }
1493
1494 static unsigned int
1495 expand_vector_operations (void)
1496 {
1497 gimple_stmt_iterator gsi;
1498 basic_block bb;
1499 bool cfg_changed = false;
1500
1501 FOR_EACH_BB (bb)
1502 {
1503 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1504 {
1505 expand_vector_operations_1 (&gsi);
1506 /* ??? If we do not cleanup EH then we will ICE in
1507 verification. But in reality we have created wrong-code
1508 as we did not properly transition EH info and edges to
1509 the piecewise computations. */
1510 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1511 && gimple_purge_dead_eh_edges (bb))
1512 cfg_changed = true;
1513 }
1514 }
1515
1516 return cfg_changed ? TODO_cleanup_cfg : 0;
1517 }
1518
1519 struct gimple_opt_pass pass_lower_vector =
1520 {
1521 {
1522 GIMPLE_PASS,
1523 "veclower", /* name */
1524 gate_expand_vector_operations_ssa, /* gate */
1525 expand_vector_operations, /* execute */
1526 NULL, /* sub */
1527 NULL, /* next */
1528 0, /* static_pass_number */
1529 TV_NONE, /* tv_id */
1530 PROP_cfg, /* properties_required */
1531 0, /* properties_provided */
1532 0, /* properties_destroyed */
1533 0, /* todo_flags_start */
1534 TODO_update_ssa /* todo_flags_finish */
1535 | TODO_verify_ssa
1536 | TODO_verify_stmts | TODO_verify_flow
1537 | TODO_cleanup_cfg
1538 }
1539 };
1540
1541 struct gimple_opt_pass pass_lower_vector_ssa =
1542 {
1543 {
1544 GIMPLE_PASS,
1545 "veclower2", /* name */
1546 0, /* gate */
1547 expand_vector_operations, /* execute */
1548 NULL, /* sub */
1549 NULL, /* next */
1550 0, /* static_pass_number */
1551 TV_NONE, /* tv_id */
1552 PROP_cfg, /* properties_required */
1553 0, /* properties_provided */
1554 0, /* properties_destroyed */
1555 0, /* todo_flags_start */
1556 TODO_update_ssa /* todo_flags_finish */
1557 | TODO_verify_ssa
1558 | TODO_verify_stmts | TODO_verify_flow
1559 | TODO_cleanup_cfg
1560 }
1561 };
1562
1563 #include "gt-tree-vect-generic.h"