re PR middle-end/52584 (Fails to constant fold vector upper/lower half BIT_FIELD_REFs)
[gcc.git] / gcc / tree-vect-generic.c
1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree.h"
25 #include "tm.h"
26 #include "langhooks.h"
27 #include "tree-flow.h"
28 #include "gimple.h"
29 #include "tree-iterator.h"
30 #include "tree-pass.h"
31 #include "flags.h"
32 #include "ggc.h"
33 #include "diagnostic.h"
34
35 /* Need to include rtl.h, expr.h, etc. for optabs. */
36 #include "expr.h"
37 #include "optabs.h"
38
39
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
41
42
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47 {
48 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49 int n = HOST_BITS_PER_WIDE_INT / width;
50 unsigned HOST_WIDE_INT low, high, mask;
51 tree ret;
52
53 gcc_assert (n);
54
55 if (width == HOST_BITS_PER_WIDE_INT)
56 low = value;
57 else
58 {
59 mask = ((HOST_WIDE_INT)1 << width) - 1;
60 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61 }
62
63 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66 high = 0;
67 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
68 high = low;
69 else
70 gcc_unreachable ();
71
72 ret = build_int_cst_wide (type, low, high);
73 return ret;
74 }
75
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
79
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81 "word_mode" (the global variable). */
82 static tree
83 build_word_mode_vector_type (int nunits)
84 {
85 if (!vector_inner_type)
86 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87 else if (vector_last_nunits == nunits)
88 {
89 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90 return vector_last_type;
91 }
92
93 /* We build a new type, but we canonicalize it nevertheless,
94 because it still saves some memory. */
95 vector_last_nunits = nunits;
96 vector_last_type = type_hash_canon (nunits,
97 build_vector_type (vector_inner_type,
98 nunits));
99 return vector_last_type;
100 }
101
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 tree, tree, tree, tree, tree, enum tree_code);
104
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 tree t, tree bitsize, tree bitpos)
108 {
109 if (bitpos)
110 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111 else
112 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113 }
114
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 enum tree_code code)
119 {
120 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121 return gimplify_build1 (gsi, code, inner_type, a);
122 }
123
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 tree bitpos, tree bitsize, enum tree_code code)
127 {
128 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132 return gimplify_build2 (gsi, code, inner_type, a, b);
133 }
134
135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
136
137 INNER_TYPE is the type of A and B elements
138
139 returned expression is of signed integer type with the
140 size equal to the size of INNER_TYPE. */
141 static tree
142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 tree bitpos, tree bitsize, enum tree_code code)
144 {
145 tree comp_type;
146
147 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
149
150 comp_type = build_nonstandard_integer_type
151 (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
152
153 return gimplify_build3 (gsi, COND_EXPR, comp_type,
154 fold_build2 (code, boolean_type_node, a, b),
155 build_int_cst (comp_type, -1),
156 build_int_cst (comp_type, 0));
157 }
158
159 /* Expand vector addition to scalars. This does bit twiddling
160 in order to increase parallelism:
161
162 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163 (a ^ b) & 0x80808080
164
165 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166 (a ^ ~b) & 0x80808080
167
168 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
169
170 This optimization should be done only if 4 vector items or more
171 fit into a word. */
172 static tree
173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 enum tree_code code)
176 {
177 tree inner_type = TREE_TYPE (TREE_TYPE (a));
178 unsigned HOST_WIDE_INT max;
179 tree low_bits, high_bits, a_low, b_low, result_low, signs;
180
181 max = GET_MODE_MASK (TYPE_MODE (inner_type));
182 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
184
185 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
187
188 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190 if (code == PLUS_EXPR)
191 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192 else
193 {
194 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
196 }
197
198 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
201 }
202
203 static tree
204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206 tree bitsize ATTRIBUTE_UNUSED,
207 enum tree_code code ATTRIBUTE_UNUSED)
208 {
209 tree inner_type = TREE_TYPE (TREE_TYPE (b));
210 HOST_WIDE_INT max;
211 tree low_bits, high_bits, b_low, result_low, signs;
212
213 max = GET_MODE_MASK (TYPE_MODE (inner_type));
214 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
216
217 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
218
219 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
224 }
225
226 /* Expand a vector operation to scalars, by using many operations
227 whose type is the vector type's inner type. */
228 static tree
229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230 tree type, tree inner_type,
231 tree a, tree b, enum tree_code code)
232 {
233 VEC(constructor_elt,gc) *v;
234 tree part_width = TYPE_SIZE (inner_type);
235 tree index = bitsize_int (0);
236 int nunits = TYPE_VECTOR_SUBPARTS (type);
237 int delta = tree_low_cst (part_width, 1)
238 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239 int i;
240 location_t loc = gimple_location (gsi_stmt (*gsi));
241
242 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243 warning_at (loc, OPT_Wvector_operation_performance,
244 "vector operation will be expanded piecewise");
245 else
246 warning_at (loc, OPT_Wvector_operation_performance,
247 "vector operation will be expanded in parallel");
248
249 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
250 for (i = 0; i < nunits;
251 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
252 {
253 tree result = f (gsi, inner_type, a, b, index, part_width, code);
254 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
255 ce->index = NULL_TREE;
256 ce->value = result;
257 }
258
259 return build_constructor (type, v);
260 }
261
262 /* Expand a vector operation to scalars with the freedom to use
263 a scalar integer type, or to use a different size for the items
264 in the vector type. */
265 static tree
266 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
267 tree a, tree b,
268 enum tree_code code)
269 {
270 tree result, compute_type;
271 enum machine_mode mode;
272 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
273 location_t loc = gimple_location (gsi_stmt (*gsi));
274
275 /* We have three strategies. If the type is already correct, just do
276 the operation an element at a time. Else, if the vector is wider than
277 one word, do it a word at a time; finally, if the vector is smaller
278 than one word, do it as a scalar. */
279 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
280 return expand_vector_piecewise (gsi, f,
281 type, TREE_TYPE (type),
282 a, b, code);
283 else if (n_words > 1)
284 {
285 tree word_type = build_word_mode_vector_type (n_words);
286 result = expand_vector_piecewise (gsi, f,
287 word_type, TREE_TYPE (word_type),
288 a, b, code);
289 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
290 GSI_SAME_STMT);
291 }
292 else
293 {
294 /* Use a single scalar operation with a mode no wider than word_mode. */
295 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
296 compute_type = lang_hooks.types.type_for_mode (mode, 1);
297 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
298 warning_at (loc, OPT_Wvector_operation_performance,
299 "vector operation will be expanded with a "
300 "single scalar operation");
301 }
302
303 return result;
304 }
305
306 /* Expand a vector operation to scalars; for integer types we can use
307 special bit twiddling tricks to do the sums a word at a time, using
308 function F_PARALLEL instead of F. These tricks are done only if
309 they can process at least four items, that is, only if the vector
310 holds at least four items and if a word can hold four items. */
311 static tree
312 expand_vector_addition (gimple_stmt_iterator *gsi,
313 elem_op_func f, elem_op_func f_parallel,
314 tree type, tree a, tree b, enum tree_code code)
315 {
316 int parts_per_word = UNITS_PER_WORD
317 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
318
319 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
320 && parts_per_word >= 4
321 && TYPE_VECTOR_SUBPARTS (type) >= 4)
322 return expand_vector_parallel (gsi, f_parallel,
323 type, a, b, code);
324 else
325 return expand_vector_piecewise (gsi, f,
326 type, TREE_TYPE (type),
327 a, b, code);
328 }
329
330 /* Check if vector VEC consists of all the equal elements and
331 that the number of elements corresponds to the type of VEC.
332 The function returns first element of the vector
333 or NULL_TREE if the vector is not uniform. */
334 static tree
335 uniform_vector_p (tree vec)
336 {
337 tree first, t, els;
338 unsigned i;
339
340 if (vec == NULL_TREE)
341 return NULL_TREE;
342
343 if (TREE_CODE (vec) == VECTOR_CST)
344 {
345 els = TREE_VECTOR_CST_ELTS (vec);
346 first = TREE_VALUE (els);
347 els = TREE_CHAIN (els);
348
349 for (t = els; t; t = TREE_CHAIN (t))
350 if (!operand_equal_p (first, TREE_VALUE (t), 0))
351 return NULL_TREE;
352
353 return first;
354 }
355
356 else if (TREE_CODE (vec) == CONSTRUCTOR)
357 {
358 first = error_mark_node;
359
360 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
361 {
362 if (i == 0)
363 {
364 first = t;
365 continue;
366 }
367 if (!operand_equal_p (first, t, 0))
368 return NULL_TREE;
369 }
370 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
371 return NULL_TREE;
372
373 return first;
374 }
375
376 return NULL_TREE;
377 }
378
379 /* Try to expand vector comparison expression OP0 CODE OP1 by
380 querying optab if the following expression:
381 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
382 can be expanded. */
383 static tree
384 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
385 tree op1, enum tree_code code)
386 {
387 tree t;
388 if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
389 t = expand_vector_piecewise (gsi, do_compare, type,
390 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
391 else
392 t = NULL_TREE;
393
394 return t;
395 }
396
397 static tree
398 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
399 gimple assign, enum tree_code code)
400 {
401 enum machine_mode compute_mode = TYPE_MODE (compute_type);
402
403 /* If the compute mode is not a vector mode (hence we are not decomposing
404 a BLKmode vector to smaller, hardware-supported vectors), we may want
405 to expand the operations in parallel. */
406 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
407 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
408 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
409 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
410 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
411 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
412 switch (code)
413 {
414 case PLUS_EXPR:
415 case MINUS_EXPR:
416 if (!TYPE_OVERFLOW_TRAPS (type))
417 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
418 gimple_assign_rhs1 (assign),
419 gimple_assign_rhs2 (assign), code);
420 break;
421
422 case NEGATE_EXPR:
423 if (!TYPE_OVERFLOW_TRAPS (type))
424 return expand_vector_addition (gsi, do_unop, do_negate, type,
425 gimple_assign_rhs1 (assign),
426 NULL_TREE, code);
427 break;
428
429 case BIT_AND_EXPR:
430 case BIT_IOR_EXPR:
431 case BIT_XOR_EXPR:
432 return expand_vector_parallel (gsi, do_binop, type,
433 gimple_assign_rhs1 (assign),
434 gimple_assign_rhs2 (assign), code);
435
436 case BIT_NOT_EXPR:
437 return expand_vector_parallel (gsi, do_unop, type,
438 gimple_assign_rhs1 (assign),
439 NULL_TREE, code);
440 case EQ_EXPR:
441 case NE_EXPR:
442 case GT_EXPR:
443 case LT_EXPR:
444 case GE_EXPR:
445 case LE_EXPR:
446 case UNEQ_EXPR:
447 case UNGT_EXPR:
448 case UNLT_EXPR:
449 case UNGE_EXPR:
450 case UNLE_EXPR:
451 case LTGT_EXPR:
452 case ORDERED_EXPR:
453 case UNORDERED_EXPR:
454 {
455 tree rhs1 = gimple_assign_rhs1 (assign);
456 tree rhs2 = gimple_assign_rhs2 (assign);
457
458 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
459 }
460 default:
461 break;
462 }
463
464 if (TREE_CODE_CLASS (code) == tcc_unary)
465 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
466 gimple_assign_rhs1 (assign),
467 NULL_TREE, code);
468 else
469 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
470 gimple_assign_rhs1 (assign),
471 gimple_assign_rhs2 (assign), code);
472 }
473 \f
474 /* Return a type for the widest vector mode whose components are of type
475 TYPE, or NULL_TREE if none is found. */
476
477 static tree
478 type_for_widest_vector_mode (tree type, optab op)
479 {
480 enum machine_mode inner_mode = TYPE_MODE (type);
481 enum machine_mode best_mode = VOIDmode, mode;
482 int best_nunits = 0;
483
484 if (SCALAR_FLOAT_MODE_P (inner_mode))
485 mode = MIN_MODE_VECTOR_FLOAT;
486 else if (SCALAR_FRACT_MODE_P (inner_mode))
487 mode = MIN_MODE_VECTOR_FRACT;
488 else if (SCALAR_UFRACT_MODE_P (inner_mode))
489 mode = MIN_MODE_VECTOR_UFRACT;
490 else if (SCALAR_ACCUM_MODE_P (inner_mode))
491 mode = MIN_MODE_VECTOR_ACCUM;
492 else if (SCALAR_UACCUM_MODE_P (inner_mode))
493 mode = MIN_MODE_VECTOR_UACCUM;
494 else
495 mode = MIN_MODE_VECTOR_INT;
496
497 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
498 if (GET_MODE_INNER (mode) == inner_mode
499 && GET_MODE_NUNITS (mode) > best_nunits
500 && optab_handler (op, mode) != CODE_FOR_nothing)
501 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
502
503 if (best_mode == VOIDmode)
504 return NULL_TREE;
505 else
506 return build_vector_type_for_mode (type, best_mode);
507 }
508
509
510 /* Build a reference to the element of the vector VECT. Function
511 returns either the element itself, either BIT_FIELD_REF, or an
512 ARRAY_REF expression.
513
514 GSI is requred to insert temporary variables while building a
515 refernece to the element of the vector VECT.
516
517 PTMPVEC is a pointer to the temporary variable for caching
518 purposes. In case when PTMPVEC is NULL new temporary variable
519 will be created. */
520 static tree
521 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
522 {
523 tree vect_type, vect_elt_type;
524 gimple asgn;
525 tree tmpvec;
526 tree arraytype;
527 bool need_asgn = true;
528 unsigned int elements;
529
530 vect_type = TREE_TYPE (vect);
531 vect_elt_type = TREE_TYPE (vect_type);
532 elements = TYPE_VECTOR_SUBPARTS (vect_type);
533
534 if (TREE_CODE (idx) == INTEGER_CST)
535 {
536 unsigned HOST_WIDE_INT index;
537
538 /* Given that we're about to compute a binary modulus,
539 we don't care about the high bits of the value. */
540 index = TREE_INT_CST_LOW (idx);
541 if (!host_integerp (idx, 1) || index >= elements)
542 {
543 index &= elements - 1;
544 idx = build_int_cst (TREE_TYPE (idx), index);
545 }
546
547 /* When lowering a vector statement sequence do some easy
548 simplification by looking through intermediate vector results. */
549 if (TREE_CODE (vect) == SSA_NAME)
550 {
551 gimple def_stmt = SSA_NAME_DEF_STMT (vect);
552 if (is_gimple_assign (def_stmt)
553 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
554 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
555 vect = gimple_assign_rhs1 (def_stmt);
556 }
557
558 if (TREE_CODE (vect) == VECTOR_CST)
559 {
560 unsigned i;
561 tree vals = TREE_VECTOR_CST_ELTS (vect);
562 for (i = 0; vals; vals = TREE_CHAIN (vals), ++i)
563 if (i == index)
564 return TREE_VALUE (vals);
565 return build_zero_cst (vect_elt_type);
566 }
567 else if (TREE_CODE (vect) == CONSTRUCTOR)
568 {
569 unsigned i;
570 tree elt_i, elt_v;
571
572 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (vect), i, elt_i, elt_v)
573 if (operand_equal_p (elt_i, idx, 0))
574 return elt_v;
575 return build_zero_cst (vect_elt_type);
576 }
577 else
578 {
579 tree size = TYPE_SIZE (vect_elt_type);
580 tree pos = fold_build2 (MULT_EXPR, TREE_TYPE (idx), idx, size);
581 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
582 }
583 }
584
585 if (!ptmpvec)
586 tmpvec = create_tmp_var (vect_type, "vectmp");
587 else if (!*ptmpvec)
588 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
589 else
590 {
591 tmpvec = *ptmpvec;
592 need_asgn = false;
593 }
594
595 if (need_asgn)
596 {
597 TREE_ADDRESSABLE (tmpvec) = 1;
598 asgn = gimple_build_assign (tmpvec, vect);
599 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
600 }
601
602 arraytype = build_array_type_nelts (vect_elt_type, elements);
603 return build4 (ARRAY_REF, vect_elt_type,
604 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
605 idx, NULL_TREE, NULL_TREE);
606 }
607
608 /* Check if VEC_PERM_EXPR within the given setting is supported
609 by hardware, or lower it piecewise.
610
611 When VEC_PERM_EXPR has the same first and second operands:
612 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
613 {v0[mask[0]], v0[mask[1]], ...}
614 MASK and V0 must have the same number of elements.
615
616 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
617 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
618 V0 and V1 must have the same type. MASK, V0, V1 must have the
619 same number of arguments. */
620
621 static void
622 lower_vec_perm (gimple_stmt_iterator *gsi)
623 {
624 gimple stmt = gsi_stmt (*gsi);
625 tree mask = gimple_assign_rhs3 (stmt);
626 tree vec0 = gimple_assign_rhs1 (stmt);
627 tree vec1 = gimple_assign_rhs2 (stmt);
628 tree vect_type = TREE_TYPE (vec0);
629 tree mask_type = TREE_TYPE (mask);
630 tree vect_elt_type = TREE_TYPE (vect_type);
631 tree mask_elt_type = TREE_TYPE (mask_type);
632 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
633 VEC(constructor_elt,gc) *v;
634 tree constr, t, si, i_val;
635 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
636 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
637 location_t loc = gimple_location (gsi_stmt (*gsi));
638 unsigned i;
639
640 if (TREE_CODE (mask) == VECTOR_CST)
641 {
642 unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
643 tree vals = TREE_VECTOR_CST_ELTS (mask);
644
645 for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
646 sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals)) & (2 * elements - 1);
647
648 if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
649 return;
650 }
651 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
652 return;
653
654 warning_at (loc, OPT_Wvector_operation_performance,
655 "vector shuffling operation will be expanded piecewise");
656
657 v = VEC_alloc (constructor_elt, gc, elements);
658 for (i = 0; i < elements; i++)
659 {
660 si = size_int (i);
661 i_val = vector_element (gsi, mask, si, &masktmp);
662
663 if (TREE_CODE (i_val) == INTEGER_CST)
664 {
665 unsigned HOST_WIDE_INT index;
666
667 index = TREE_INT_CST_LOW (i_val);
668 if (!host_integerp (i_val, 1) || index >= elements)
669 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
670
671 if (two_operand_p && (index & elements) != 0)
672 t = vector_element (gsi, vec1, i_val, &vec1tmp);
673 else
674 t = vector_element (gsi, vec0, i_val, &vec0tmp);
675
676 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
677 true, GSI_SAME_STMT);
678 }
679 else
680 {
681 tree cond = NULL_TREE, v0_val;
682
683 if (two_operand_p)
684 {
685 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
686 build_int_cst (mask_elt_type, elements));
687 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
688 true, GSI_SAME_STMT);
689 }
690
691 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
692 build_int_cst (mask_elt_type, elements - 1));
693 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
694 true, GSI_SAME_STMT);
695
696 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
697 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
698 true, GSI_SAME_STMT);
699
700 if (two_operand_p)
701 {
702 tree v1_val;
703
704 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
705 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
706 true, GSI_SAME_STMT);
707
708 cond = fold_build2 (EQ_EXPR, boolean_type_node,
709 cond, build_zero_cst (mask_elt_type));
710 cond = fold_build3 (COND_EXPR, vect_elt_type,
711 cond, v0_val, v1_val);
712 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
713 true, GSI_SAME_STMT);
714 }
715 else
716 t = v0_val;
717 }
718
719 CONSTRUCTOR_APPEND_ELT (v, si, t);
720 }
721
722 constr = build_constructor (vect_type, v);
723 gimple_assign_set_rhs_from_tree (gsi, constr);
724 update_stmt (gsi_stmt (*gsi));
725 }
726
727 /* Process one statement. If we identify a vector operation, expand it. */
728
729 static void
730 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
731 {
732 gimple stmt = gsi_stmt (*gsi);
733 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
734 enum tree_code code;
735 enum machine_mode compute_mode;
736 optab op = NULL;
737 enum gimple_rhs_class rhs_class;
738 tree new_rhs;
739
740 if (gimple_code (stmt) != GIMPLE_ASSIGN)
741 return;
742
743 code = gimple_assign_rhs_code (stmt);
744 rhs_class = get_gimple_rhs_class (code);
745 lhs = gimple_assign_lhs (stmt);
746
747 if (code == VEC_PERM_EXPR)
748 {
749 lower_vec_perm (gsi);
750 return;
751 }
752
753 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
754 return;
755
756 rhs1 = gimple_assign_rhs1 (stmt);
757 type = gimple_expr_type (stmt);
758 if (rhs_class == GIMPLE_BINARY_RHS)
759 rhs2 = gimple_assign_rhs2 (stmt);
760
761 if (TREE_CODE (type) != VECTOR_TYPE)
762 return;
763
764 if (code == NOP_EXPR
765 || code == FLOAT_EXPR
766 || code == FIX_TRUNC_EXPR
767 || code == VIEW_CONVERT_EXPR)
768 return;
769
770 gcc_assert (code != CONVERT_EXPR);
771
772 /* The signedness is determined from input argument. */
773 if (code == VEC_UNPACK_FLOAT_HI_EXPR
774 || code == VEC_UNPACK_FLOAT_LO_EXPR)
775 type = TREE_TYPE (rhs1);
776
777 /* Choose between vector shift/rotate by vector and vector shift/rotate by
778 scalar */
779 if (code == LSHIFT_EXPR
780 || code == RSHIFT_EXPR
781 || code == LROTATE_EXPR
782 || code == RROTATE_EXPR)
783 {
784 optab opv;
785
786 /* Check whether we have vector <op> {x,x,x,x} where x
787 could be a scalar variable or a constant. Transform
788 vector <op> {x,x,x,x} ==> vector <op> scalar. */
789 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
790 {
791 tree first;
792 gimple def_stmt;
793
794 if ((TREE_CODE (rhs2) == VECTOR_CST
795 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
796 || (TREE_CODE (rhs2) == SSA_NAME
797 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
798 && gimple_assign_single_p (def_stmt)
799 && (first = uniform_vector_p
800 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
801 {
802 gimple_assign_set_rhs2 (stmt, first);
803 update_stmt (stmt);
804 rhs2 = first;
805 }
806 }
807
808 opv = optab_for_tree_code (code, type, optab_vector);
809 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
810 op = opv;
811 else
812 {
813 op = optab_for_tree_code (code, type, optab_scalar);
814
815 /* The rtl expander will expand vector/scalar as vector/vector
816 if necessary. Don't bother converting the stmt here. */
817 if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
818 && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
819 return;
820 }
821 }
822 else
823 op = optab_for_tree_code (code, type, optab_default);
824
825 /* For widening/narrowing vector operations, the relevant type is of the
826 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
827 calculated in the same way above. */
828 if (code == WIDEN_SUM_EXPR
829 || code == VEC_WIDEN_MULT_HI_EXPR
830 || code == VEC_WIDEN_MULT_LO_EXPR
831 || code == VEC_UNPACK_HI_EXPR
832 || code == VEC_UNPACK_LO_EXPR
833 || code == VEC_PACK_TRUNC_EXPR
834 || code == VEC_PACK_SAT_EXPR
835 || code == VEC_PACK_FIX_TRUNC_EXPR
836 || code == VEC_WIDEN_LSHIFT_HI_EXPR
837 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
838 type = TREE_TYPE (rhs1);
839
840 /* Optabs will try converting a negation into a subtraction, so
841 look for it as well. TODO: negation of floating-point vectors
842 might be turned into an exclusive OR toggling the sign bit. */
843 if (op == NULL
844 && code == NEGATE_EXPR
845 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
846 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
847
848 /* For very wide vectors, try using a smaller vector mode. */
849 compute_type = type;
850 if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
851 {
852 tree vector_compute_type
853 = type_for_widest_vector_mode (TREE_TYPE (type), op);
854 if (vector_compute_type != NULL_TREE
855 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
856 < TYPE_VECTOR_SUBPARTS (compute_type))
857 && (optab_handler (op, TYPE_MODE (vector_compute_type))
858 != CODE_FOR_nothing))
859 compute_type = vector_compute_type;
860 }
861
862 /* If we are breaking a BLKmode vector into smaller pieces,
863 type_for_widest_vector_mode has already looked into the optab,
864 so skip these checks. */
865 if (compute_type == type)
866 {
867 compute_mode = TYPE_MODE (compute_type);
868 if (VECTOR_MODE_P (compute_mode)
869 && op != NULL
870 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
871 return;
872 else
873 /* There is no operation in hardware, so fall back to scalars. */
874 compute_type = TREE_TYPE (type);
875 }
876
877 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
878 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
879
880 /* Leave expression untouched for later expansion. */
881 if (new_rhs == NULL_TREE)
882 return;
883
884 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
885 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
886 new_rhs);
887
888 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
889 way to do it is change expand_vector_operation and its callees to
890 return a tree_code, RHS1 and RHS2 instead of a tree. */
891 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
892 update_stmt (gsi_stmt (*gsi));
893 }
894 \f
895 /* Use this to lower vector operations introduced by the vectorizer,
896 if it may need the bit-twiddling tricks implemented in this file. */
897
898 static bool
899 gate_expand_vector_operations_ssa (void)
900 {
901 return optimize == 0;
902 }
903
904 static unsigned int
905 expand_vector_operations (void)
906 {
907 gimple_stmt_iterator gsi;
908 basic_block bb;
909 bool cfg_changed = false;
910
911 FOR_EACH_BB (bb)
912 {
913 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
914 {
915 expand_vector_operations_1 (&gsi);
916 /* ??? If we do not cleanup EH then we will ICE in
917 verification. But in reality we have created wrong-code
918 as we did not properly transition EH info and edges to
919 the piecewise computations. */
920 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
921 && gimple_purge_dead_eh_edges (bb))
922 cfg_changed = true;
923 }
924 }
925
926 return cfg_changed ? TODO_cleanup_cfg : 0;
927 }
928
929 struct gimple_opt_pass pass_lower_vector =
930 {
931 {
932 GIMPLE_PASS,
933 "veclower", /* name */
934 gate_expand_vector_operations_ssa, /* gate */
935 expand_vector_operations, /* execute */
936 NULL, /* sub */
937 NULL, /* next */
938 0, /* static_pass_number */
939 TV_NONE, /* tv_id */
940 PROP_cfg, /* properties_required */
941 0, /* properties_provided */
942 0, /* properties_destroyed */
943 0, /* todo_flags_start */
944 TODO_update_ssa /* todo_flags_finish */
945 | TODO_verify_ssa
946 | TODO_verify_stmts | TODO_verify_flow
947 | TODO_cleanup_cfg
948 }
949 };
950
951 struct gimple_opt_pass pass_lower_vector_ssa =
952 {
953 {
954 GIMPLE_PASS,
955 "veclower2", /* name */
956 0, /* gate */
957 expand_vector_operations, /* execute */
958 NULL, /* sub */
959 NULL, /* next */
960 0, /* static_pass_number */
961 TV_NONE, /* tv_id */
962 PROP_cfg, /* properties_required */
963 0, /* properties_provided */
964 0, /* properties_destroyed */
965 0, /* todo_flags_start */
966 TODO_update_ssa /* todo_flags_finish */
967 | TODO_verify_ssa
968 | TODO_verify_stmts | TODO_verify_flow
969 | TODO_cleanup_cfg
970 }
971 };
972
973 #include "gt-tree-vect-generic.h"