ggcplug.c: Shuffle includes to include gcc-plugin.h earlier.
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
66 #include "dumpfile.h"
67 #include "cgraph.h"
68 #include "builtins.h"
69
70 /* For lang_hooks.types.type_for_mode. */
71 #include "langhooks.h"
72
73 /* Return the vectorized type for the given statement. */
74
75 tree
76 stmt_vectype (struct _stmt_vec_info *stmt_info)
77 {
78 return STMT_VINFO_VECTYPE (stmt_info);
79 }
80
81 /* Return TRUE iff the given statement is in an inner loop relative to
82 the loop being vectorized. */
83 bool
84 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
85 {
86 gimple stmt = STMT_VINFO_STMT (stmt_info);
87 basic_block bb = gimple_bb (stmt);
88 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
89 struct loop* loop;
90
91 if (!loop_vinfo)
92 return false;
93
94 loop = LOOP_VINFO_LOOP (loop_vinfo);
95
96 return (bb->loop_father == loop->inner);
97 }
98
99 /* Record the cost of a statement, either by directly informing the
100 target model or by saving it in a vector for later processing.
101 Return a preliminary estimate of the statement's cost. */
102
103 unsigned
104 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
105 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
106 int misalign, enum vect_cost_model_location where)
107 {
108 if (body_cost_vec)
109 {
110 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
111 add_stmt_info_to_vec (body_cost_vec, count, kind,
112 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
113 misalign);
114 return (unsigned)
115 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116
117 }
118 else
119 {
120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
122 void *target_cost_data;
123
124 if (loop_vinfo)
125 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
126 else
127 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
128
129 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
130 misalign, where);
131 }
132 }
133
134 /* Return a variable of type ELEM_TYPE[NELEMS]. */
135
136 static tree
137 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
138 {
139 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
140 "vect_array");
141 }
142
143 /* ARRAY is an array of vectors created by create_vector_array.
144 Return an SSA_NAME for the vector in index N. The reference
145 is part of the vectorization of STMT and the vector is associated
146 with scalar destination SCALAR_DEST. */
147
148 static tree
149 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
150 tree array, unsigned HOST_WIDE_INT n)
151 {
152 tree vect_type, vect, vect_name, array_ref;
153 gimple new_stmt;
154
155 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
156 vect_type = TREE_TYPE (TREE_TYPE (array));
157 vect = vect_create_destination_var (scalar_dest, vect_type);
158 array_ref = build4 (ARRAY_REF, vect_type, array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
161
162 new_stmt = gimple_build_assign (vect, array_ref);
163 vect_name = make_ssa_name (vect, new_stmt);
164 gimple_assign_set_lhs (new_stmt, vect_name);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
166
167 return vect_name;
168 }
169
170 /* ARRAY is an array of vectors created by create_vector_array.
171 Emit code to store SSA_NAME VECT in index N of the array.
172 The store is part of the vectorization of STMT. */
173
174 static void
175 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
176 tree array, unsigned HOST_WIDE_INT n)
177 {
178 tree array_ref;
179 gimple new_stmt;
180
181 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
182 build_int_cst (size_type_node, n),
183 NULL_TREE, NULL_TREE);
184
185 new_stmt = gimple_build_assign (array_ref, vect);
186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
187 }
188
189 /* PTR is a pointer to an array of type TYPE. Return a representation
190 of *PTR. The memory reference replaces those in FIRST_DR
191 (and its group). */
192
193 static tree
194 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
195 {
196 tree mem_ref, alias_ptr_type;
197
198 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
199 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
200 /* Arrays have the same alignment as their type. */
201 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
202 return mem_ref;
203 }
204
205 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
206
207 /* Function vect_mark_relevant.
208
209 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
210
211 static void
212 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
213 enum vect_relevant relevant, bool live_p,
214 bool used_in_pattern)
215 {
216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
217 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 gimple pattern_stmt;
220
221 if (dump_enabled_p ())
222 dump_printf_loc (MSG_NOTE, vect_location,
223 "mark relevant %d, live %d.\n", relevant, live_p);
224
225 /* If this stmt is an original stmt in a pattern, we might need to mark its
226 related pattern stmt instead of the original stmt. However, such stmts
227 may have their own uses that are not in any pattern, in such cases the
228 stmt itself should be marked. */
229 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
230 {
231 bool found = false;
232 if (!used_in_pattern)
233 {
234 imm_use_iterator imm_iter;
235 use_operand_p use_p;
236 gimple use_stmt;
237 tree lhs;
238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
240
241 if (is_gimple_assign (stmt))
242 lhs = gimple_assign_lhs (stmt);
243 else
244 lhs = gimple_call_lhs (stmt);
245
246 /* This use is out of pattern use, if LHS has other uses that are
247 pattern uses, we should mark the stmt itself, and not the pattern
248 stmt. */
249 if (lhs && TREE_CODE (lhs) == SSA_NAME)
250 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
251 {
252 if (is_gimple_debug (USE_STMT (use_p)))
253 continue;
254 use_stmt = USE_STMT (use_p);
255
256 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
257 continue;
258
259 if (vinfo_for_stmt (use_stmt)
260 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
261 {
262 found = true;
263 break;
264 }
265 }
266 }
267
268 if (!found)
269 {
270 /* This is the last stmt in a sequence that was detected as a
271 pattern that can potentially be vectorized. Don't mark the stmt
272 as relevant/live because it's not going to be vectorized.
273 Instead mark the pattern-stmt that replaces it. */
274
275 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
276
277 if (dump_enabled_p ())
278 dump_printf_loc (MSG_NOTE, vect_location,
279 "last stmt in pattern. don't mark"
280 " relevant/live.\n");
281 stmt_info = vinfo_for_stmt (pattern_stmt);
282 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
283 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
284 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
285 stmt = pattern_stmt;
286 }
287 }
288
289 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
290 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
291 STMT_VINFO_RELEVANT (stmt_info) = relevant;
292
293 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
294 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
295 {
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_NOTE, vect_location,
298 "already marked relevant/live.\n");
299 return;
300 }
301
302 worklist->safe_push (stmt);
303 }
304
305
306 /* Function vect_stmt_relevant_p.
307
308 Return true if STMT in loop that is represented by LOOP_VINFO is
309 "relevant for vectorization".
310
311 A stmt is considered "relevant for vectorization" if:
312 - it has uses outside the loop.
313 - it has vdefs (it alters memory).
314 - control stmts in the loop (except for the exit condition).
315
316 CHECKME: what other side effects would the vectorizer allow? */
317
318 static bool
319 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
320 enum vect_relevant *relevant, bool *live_p)
321 {
322 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
323 ssa_op_iter op_iter;
324 imm_use_iterator imm_iter;
325 use_operand_p use_p;
326 def_operand_p def_p;
327
328 *relevant = vect_unused_in_scope;
329 *live_p = false;
330
331 /* cond stmt other than loop exit cond. */
332 if (is_ctrl_stmt (stmt)
333 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
334 != loop_exit_ctrl_vec_info_type)
335 *relevant = vect_used_in_scope;
336
337 /* changing memory. */
338 if (gimple_code (stmt) != GIMPLE_PHI)
339 if (gimple_vdef (stmt))
340 {
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: stmt has vdefs.\n");
344 *relevant = vect_used_in_scope;
345 }
346
347 /* uses outside the loop. */
348 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
349 {
350 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
351 {
352 basic_block bb = gimple_bb (USE_STMT (use_p));
353 if (!flow_bb_inside_loop_p (loop, bb))
354 {
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: used out of loop.\n");
358
359 if (is_gimple_debug (USE_STMT (use_p)))
360 continue;
361
362 /* We expect all such uses to be in the loop exit phis
363 (because of loop closed form) */
364 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
365 gcc_assert (bb == single_exit (loop)->dest);
366
367 *live_p = true;
368 }
369 }
370 }
371
372 return (*live_p || *relevant);
373 }
374
375
376 /* Function exist_non_indexing_operands_for_use_p
377
378 USE is one of the uses attached to STMT. Check if USE is
379 used in STMT for anything other than indexing an array. */
380
381 static bool
382 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
383 {
384 tree operand;
385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
386
387 /* USE corresponds to some operand in STMT. If there is no data
388 reference in STMT, then any operand that corresponds to USE
389 is not indexing an array. */
390 if (!STMT_VINFO_DATA_REF (stmt_info))
391 return true;
392
393 /* STMT has a data_ref. FORNOW this means that its of one of
394 the following forms:
395 -1- ARRAY_REF = var
396 -2- var = ARRAY_REF
397 (This should have been verified in analyze_data_refs).
398
399 'var' in the second case corresponds to a def, not a use,
400 so USE cannot correspond to any operands that are not used
401 for array indexing.
402
403 Therefore, all we need to check is if STMT falls into the
404 first case, and whether var corresponds to USE. */
405
406 if (!gimple_assign_copy_p (stmt))
407 {
408 if (is_gimple_call (stmt)
409 && gimple_call_internal_p (stmt))
410 switch (gimple_call_internal_fn (stmt))
411 {
412 case IFN_MASK_STORE:
413 operand = gimple_call_arg (stmt, 3);
414 if (operand == use)
415 return true;
416 /* FALLTHRU */
417 case IFN_MASK_LOAD:
418 operand = gimple_call_arg (stmt, 2);
419 if (operand == use)
420 return true;
421 break;
422 default:
423 break;
424 }
425 return false;
426 }
427
428 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
429 return false;
430 operand = gimple_assign_rhs1 (stmt);
431 if (TREE_CODE (operand) != SSA_NAME)
432 return false;
433
434 if (operand == use)
435 return true;
436
437 return false;
438 }
439
440
441 /*
442 Function process_use.
443
444 Inputs:
445 - a USE in STMT in a loop represented by LOOP_VINFO
446 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
447 that defined USE. This is done by calling mark_relevant and passing it
448 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
449 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
450 be performed.
451
452 Outputs:
453 Generally, LIVE_P and RELEVANT are used to define the liveness and
454 relevance info of the DEF_STMT of this USE:
455 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
456 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
457 Exceptions:
458 - case 1: If USE is used only for address computations (e.g. array indexing),
459 which does not need to be directly vectorized, then the liveness/relevance
460 of the respective DEF_STMT is left unchanged.
461 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
462 skip DEF_STMT cause it had already been processed.
463 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
464 be modified accordingly.
465
466 Return true if everything is as expected. Return false otherwise. */
467
468 static bool
469 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
470 enum vect_relevant relevant, vec<gimple> *worklist,
471 bool force)
472 {
473 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
474 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
475 stmt_vec_info dstmt_vinfo;
476 basic_block bb, def_bb;
477 tree def;
478 gimple def_stmt;
479 enum vect_def_type dt;
480
481 /* case 1: we are only interested in uses that need to be vectorized. Uses
482 that are used for address computation are not considered relevant. */
483 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
484 return true;
485
486 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
487 {
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
490 "not vectorized: unsupported use in stmt.\n");
491 return false;
492 }
493
494 if (!def_stmt || gimple_nop_p (def_stmt))
495 return true;
496
497 def_bb = gimple_bb (def_stmt);
498 if (!flow_bb_inside_loop_p (loop, def_bb))
499 {
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
502 return true;
503 }
504
505 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
506 DEF_STMT must have already been processed, because this should be the
507 only way that STMT, which is a reduction-phi, was put in the worklist,
508 as there should be no other uses for DEF_STMT in the loop. So we just
509 check that everything is as expected, and we are done. */
510 dstmt_vinfo = vinfo_for_stmt (def_stmt);
511 bb = gimple_bb (stmt);
512 if (gimple_code (stmt) == GIMPLE_PHI
513 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
514 && gimple_code (def_stmt) != GIMPLE_PHI
515 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
516 && bb->loop_father == def_bb->loop_father)
517 {
518 if (dump_enabled_p ())
519 dump_printf_loc (MSG_NOTE, vect_location,
520 "reduc-stmt defining reduc-phi in the same nest.\n");
521 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
522 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
523 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
524 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
525 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
526 return true;
527 }
528
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = def_stmt
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
537 {
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
541
542 switch (relevant)
543 {
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
548
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
553
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
558
559 case vect_used_in_scope:
560 break;
561
562 default:
563 gcc_unreachable ();
564 }
565 }
566
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
569 ...
570 inner-loop:
571 d = def_stmt
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
575 {
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
579
580 switch (relevant)
581 {
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
587
588 case vect_used_by_reduction:
589 relevant = vect_used_in_outer_by_reduction;
590 break;
591
592 case vect_used_in_scope:
593 relevant = vect_used_in_outer;
594 break;
595
596 default:
597 gcc_unreachable ();
598 }
599 }
600
601 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
602 is_pattern_stmt_p (stmt_vinfo));
603 return true;
604 }
605
606
607 /* Function vect_mark_stmts_to_be_vectorized.
608
609 Not all stmts in the loop need to be vectorized. For example:
610
611 for i...
612 for j...
613 1. T0 = i + j
614 2. T1 = a[T0]
615
616 3. j = j + 1
617
618 Stmt 1 and 3 do not need to be vectorized, because loop control and
619 addressing of vectorized data-refs are handled differently.
620
621 This pass detects such stmts. */
622
623 bool
624 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
625 {
626 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
627 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
628 unsigned int nbbs = loop->num_nodes;
629 gimple_stmt_iterator si;
630 gimple stmt;
631 unsigned int i;
632 stmt_vec_info stmt_vinfo;
633 basic_block bb;
634 gimple phi;
635 bool live_p;
636 enum vect_relevant relevant, tmp_relevant;
637 enum vect_def_type def_type;
638
639 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE, vect_location,
641 "=== vect_mark_stmts_to_be_vectorized ===\n");
642
643 auto_vec<gimple, 64> worklist;
644
645 /* 1. Init worklist. */
646 for (i = 0; i < nbbs; i++)
647 {
648 bb = bbs[i];
649 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
650 {
651 phi = gsi_stmt (si);
652 if (dump_enabled_p ())
653 {
654 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
656 dump_printf (MSG_NOTE, "\n");
657 }
658
659 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
660 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
661 }
662 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
663 {
664 stmt = gsi_stmt (si);
665 if (dump_enabled_p ())
666 {
667 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
669 dump_printf (MSG_NOTE, "\n");
670 }
671
672 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
673 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
674 }
675 }
676
677 /* 2. Process_worklist */
678 while (worklist.length () > 0)
679 {
680 use_operand_p use_p;
681 ssa_op_iter iter;
682
683 stmt = worklist.pop ();
684 if (dump_enabled_p ())
685 {
686 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
688 dump_printf (MSG_NOTE, "\n");
689 }
690
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant and live/dead according to the
693 liveness and relevance properties of STMT. */
694 stmt_vinfo = vinfo_for_stmt (stmt);
695 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
696 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
697
698 /* Generally, the liveness and relevance properties of STMT are
699 propagated as is to the DEF_STMTs of its USEs:
700 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
701 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
702
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the liveness/relevance as follows:
705 live_p = false
706 relevant = vect_used_by_reduction
707 This is because we distinguish between two kinds of relevant stmts -
708 those that are used by a reduction computation, and those that are
709 (also) used by a regular computation. This allows us later on to
710 identify stmts that are used solely by a reduction, and therefore the
711 order of the results that they produce does not have to be kept. */
712
713 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
714 tmp_relevant = relevant;
715 switch (def_type)
716 {
717 case vect_reduction_def:
718 switch (tmp_relevant)
719 {
720 case vect_unused_in_scope:
721 relevant = vect_used_by_reduction;
722 break;
723
724 case vect_used_by_reduction:
725 if (gimple_code (stmt) == GIMPLE_PHI)
726 break;
727 /* fall through */
728
729 default:
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of reduction.\n");
733 return false;
734 }
735
736 live_p = false;
737 break;
738
739 case vect_nested_cycle:
740 if (tmp_relevant != vect_unused_in_scope
741 && tmp_relevant != vect_used_in_outer_by_reduction
742 && tmp_relevant != vect_used_in_outer)
743 {
744 if (dump_enabled_p ())
745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
746 "unsupported use of nested cycle.\n");
747
748 return false;
749 }
750
751 live_p = false;
752 break;
753
754 case vect_double_reduction_def:
755 if (tmp_relevant != vect_unused_in_scope
756 && tmp_relevant != vect_used_by_reduction)
757 {
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
760 "unsupported use of double reduction.\n");
761
762 return false;
763 }
764
765 live_p = false;
766 break;
767
768 default:
769 break;
770 }
771
772 if (is_pattern_stmt_p (stmt_vinfo))
773 {
774 /* Pattern statements are not inserted into the code, so
775 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
776 have to scan the RHS or function arguments instead. */
777 if (is_gimple_assign (stmt))
778 {
779 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
780 tree op = gimple_assign_rhs1 (stmt);
781
782 i = 1;
783 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
784 {
785 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
786 live_p, relevant, &worklist, false)
787 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
788 live_p, relevant, &worklist, false))
789 return false;
790 i = 2;
791 }
792 for (; i < gimple_num_ops (stmt); i++)
793 {
794 op = gimple_op (stmt, i);
795 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
796 &worklist, false))
797 return false;
798 }
799 }
800 else if (is_gimple_call (stmt))
801 {
802 for (i = 0; i < gimple_call_num_args (stmt); i++)
803 {
804 tree arg = gimple_call_arg (stmt, i);
805 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
806 &worklist, false))
807 return false;
808 }
809 }
810 }
811 else
812 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
813 {
814 tree op = USE_FROM_PTR (use_p);
815 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
816 &worklist, false))
817 return false;
818 }
819
820 if (STMT_VINFO_GATHER_P (stmt_vinfo))
821 {
822 tree off;
823 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
824 gcc_assert (decl);
825 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
826 &worklist, true))
827 return false;
828 }
829 } /* while worklist */
830
831 return true;
832 }
833
834
835 /* Function vect_model_simple_cost.
836
837 Models cost for simple operations, i.e. those that only emit ncopies of a
838 single op. Right now, this does not account for multiple insns that could
839 be generated for the single vector op. We will handle that shortly. */
840
841 void
842 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
843 enum vect_def_type *dt,
844 stmt_vector_for_cost *prologue_cost_vec,
845 stmt_vector_for_cost *body_cost_vec)
846 {
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
849
850 /* The SLP costs were already calculated during SLP tree build. */
851 if (PURE_SLP_STMT (stmt_info))
852 return;
853
854 /* FORNOW: Assuming maximum 2 args per stmts. */
855 for (i = 0; i < 2; i++)
856 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
857 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
858 stmt_info, 0, vect_prologue);
859
860 /* Pass the inside-of-loop statements to the target-specific cost model. */
861 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
862 stmt_info, 0, vect_body);
863
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_NOTE, vect_location,
866 "vect_model_simple_cost: inside_cost = %d, "
867 "prologue_cost = %d .\n", inside_cost, prologue_cost);
868 }
869
870
871 /* Model cost for type demotion and promotion operations. PWR is normally
872 zero for single-step promotions and demotions. It will be one if
873 two-step promotion/demotion is required, and so on. Each additional
874 step doubles the number of instructions required. */
875
876 static void
877 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
878 enum vect_def_type *dt, int pwr)
879 {
880 int i, tmp;
881 int inside_cost = 0, prologue_cost = 0;
882 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
884 void *target_cost_data;
885
886 /* The SLP costs were already calculated during SLP tree build. */
887 if (PURE_SLP_STMT (stmt_info))
888 return;
889
890 if (loop_vinfo)
891 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
892 else
893 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
894
895 for (i = 0; i < pwr + 1; i++)
896 {
897 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
898 (i + 1) : i;
899 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
900 vec_promote_demote, stmt_info, 0,
901 vect_body);
902 }
903
904 /* FORNOW: Assuming maximum 2 args per stmts. */
905 for (i = 0; i < 2; i++)
906 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
907 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
908 stmt_info, 0, vect_prologue);
909
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_promotion_demotion_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
914 }
915
916 /* Function vect_cost_group_size
917
918 For grouped load or store, return the group_size only if it is the first
919 load or store of a group, else return 1. This ensures that group size is
920 only returned once per group. */
921
922 static int
923 vect_cost_group_size (stmt_vec_info stmt_info)
924 {
925 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
926
927 if (first_stmt == STMT_VINFO_STMT (stmt_info))
928 return GROUP_SIZE (stmt_info);
929
930 return 1;
931 }
932
933
934 /* Function vect_model_store_cost
935
936 Models cost for stores. In the case of grouped accesses, one access
937 has the overhead of the grouped access attributed to it. */
938
939 void
940 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
941 bool store_lanes_p, enum vect_def_type dt,
942 slp_tree slp_node,
943 stmt_vector_for_cost *prologue_cost_vec,
944 stmt_vector_for_cost *body_cost_vec)
945 {
946 int group_size;
947 unsigned int inside_cost = 0, prologue_cost = 0;
948 struct data_reference *first_dr;
949 gimple first_stmt;
950
951 /* The SLP costs were already calculated during SLP tree build. */
952 if (PURE_SLP_STMT (stmt_info))
953 return;
954
955 if (dt == vect_constant_def || dt == vect_external_def)
956 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
957 stmt_info, 0, vect_prologue);
958
959 /* Grouped access? */
960 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
961 {
962 if (slp_node)
963 {
964 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
965 group_size = 1;
966 }
967 else
968 {
969 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
970 group_size = vect_cost_group_size (stmt_info);
971 }
972
973 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
974 }
975 /* Not a grouped access. */
976 else
977 {
978 group_size = 1;
979 first_dr = STMT_VINFO_DATA_REF (stmt_info);
980 }
981
982 /* We assume that the cost of a single store-lanes instruction is
983 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
984 access is instead being provided by a permute-and-store operation,
985 include the cost of the permutes. */
986 if (!store_lanes_p && group_size > 1)
987 {
988 /* Uses a high and low interleave or shuffle operations for each
989 needed permute. */
990 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
991 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
992 stmt_info, 0, vect_body);
993
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_NOTE, vect_location,
996 "vect_model_store_cost: strided group_size = %d .\n",
997 group_size);
998 }
999
1000 /* Costs of the stores. */
1001 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1002
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_NOTE, vect_location,
1005 "vect_model_store_cost: inside_cost = %d, "
1006 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1007 }
1008
1009
1010 /* Calculate cost of DR's memory access. */
1011 void
1012 vect_get_store_cost (struct data_reference *dr, int ncopies,
1013 unsigned int *inside_cost,
1014 stmt_vector_for_cost *body_cost_vec)
1015 {
1016 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1017 gimple stmt = DR_STMT (dr);
1018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1019
1020 switch (alignment_support_scheme)
1021 {
1022 case dr_aligned:
1023 {
1024 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1025 vector_store, stmt_info, 0,
1026 vect_body);
1027
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_NOTE, vect_location,
1030 "vect_model_store_cost: aligned.\n");
1031 break;
1032 }
1033
1034 case dr_unaligned_supported:
1035 {
1036 /* Here, we assign an additional cost for the unaligned store. */
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 unaligned_store, stmt_info,
1039 DR_MISALIGNMENT (dr), vect_body);
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE, vect_location,
1042 "vect_model_store_cost: unaligned supported by "
1043 "hardware.\n");
1044 break;
1045 }
1046
1047 case dr_unaligned_unsupported:
1048 {
1049 *inside_cost = VECT_MAX_COST;
1050
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1053 "vect_model_store_cost: unsupported access.\n");
1054 break;
1055 }
1056
1057 default:
1058 gcc_unreachable ();
1059 }
1060 }
1061
1062
1063 /* Function vect_model_load_cost
1064
1065 Models cost for loads. In the case of grouped accesses, the last access
1066 has the overhead of the grouped access attributed to it. Since unaligned
1067 accesses are supported for loads, we also account for the costs of the
1068 access scheme chosen. */
1069
1070 void
1071 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1072 bool load_lanes_p, slp_tree slp_node,
1073 stmt_vector_for_cost *prologue_cost_vec,
1074 stmt_vector_for_cost *body_cost_vec)
1075 {
1076 int group_size;
1077 gimple first_stmt;
1078 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1079 unsigned int inside_cost = 0, prologue_cost = 0;
1080
1081 /* The SLP costs were already calculated during SLP tree build. */
1082 if (PURE_SLP_STMT (stmt_info))
1083 return;
1084
1085 /* Grouped accesses? */
1086 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1087 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1088 {
1089 group_size = vect_cost_group_size (stmt_info);
1090 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1091 }
1092 /* Not a grouped access. */
1093 else
1094 {
1095 group_size = 1;
1096 first_dr = dr;
1097 }
1098
1099 /* We assume that the cost of a single load-lanes instruction is
1100 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1101 access is instead being provided by a load-and-permute operation,
1102 include the cost of the permutes. */
1103 if (!load_lanes_p && group_size > 1)
1104 {
1105 /* Uses an even and odd extract operations or shuffle operations
1106 for each needed permute. */
1107 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1108 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1109 stmt_info, 0, vect_body);
1110
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE, vect_location,
1113 "vect_model_load_cost: strided group_size = %d .\n",
1114 group_size);
1115 }
1116
1117 /* The loads themselves. */
1118 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1119 {
1120 /* N scalar loads plus gathering them into a vector. */
1121 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1122 inside_cost += record_stmt_cost (body_cost_vec,
1123 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1124 scalar_load, stmt_info, 0, vect_body);
1125 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1126 stmt_info, 0, vect_body);
1127 }
1128 else
1129 vect_get_load_cost (first_dr, ncopies,
1130 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1131 || group_size > 1 || slp_node),
1132 &inside_cost, &prologue_cost,
1133 prologue_cost_vec, body_cost_vec, true);
1134
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE, vect_location,
1137 "vect_model_load_cost: inside_cost = %d, "
1138 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1139 }
1140
1141
1142 /* Calculate cost of DR's memory access. */
1143 void
1144 vect_get_load_cost (struct data_reference *dr, int ncopies,
1145 bool add_realign_cost, unsigned int *inside_cost,
1146 unsigned int *prologue_cost,
1147 stmt_vector_for_cost *prologue_cost_vec,
1148 stmt_vector_for_cost *body_cost_vec,
1149 bool record_prologue_costs)
1150 {
1151 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1152 gimple stmt = DR_STMT (dr);
1153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1154
1155 switch (alignment_support_scheme)
1156 {
1157 case dr_aligned:
1158 {
1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1160 stmt_info, 0, vect_body);
1161
1162 if (dump_enabled_p ())
1163 dump_printf_loc (MSG_NOTE, vect_location,
1164 "vect_model_load_cost: aligned.\n");
1165
1166 break;
1167 }
1168 case dr_unaligned_supported:
1169 {
1170 /* Here, we assign an additional cost for the unaligned load. */
1171 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1172 unaligned_load, stmt_info,
1173 DR_MISALIGNMENT (dr), vect_body);
1174
1175 if (dump_enabled_p ())
1176 dump_printf_loc (MSG_NOTE, vect_location,
1177 "vect_model_load_cost: unaligned supported by "
1178 "hardware.\n");
1179
1180 break;
1181 }
1182 case dr_explicit_realign:
1183 {
1184 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1185 vector_load, stmt_info, 0, vect_body);
1186 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1187 vec_perm, stmt_info, 0, vect_body);
1188
1189 /* FIXME: If the misalignment remains fixed across the iterations of
1190 the containing loop, the following cost should be added to the
1191 prologue costs. */
1192 if (targetm.vectorize.builtin_mask_for_load)
1193 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1194 stmt_info, 0, vect_body);
1195
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: explicit realign\n");
1199
1200 break;
1201 }
1202 case dr_explicit_realign_optimized:
1203 {
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: unaligned software "
1207 "pipelined.\n");
1208
1209 /* Unaligned software pipeline has a load of an address, an initial
1210 load, and possibly a mask operation to "prime" the loop. However,
1211 if this is an access in a group of loads, which provide grouped
1212 access, then the above cost should only be considered for one
1213 access in the group. Inside the loop, there is a load op
1214 and a realignment op. */
1215
1216 if (add_realign_cost && record_prologue_costs)
1217 {
1218 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1219 vector_stmt, stmt_info,
1220 0, vect_prologue);
1221 if (targetm.vectorize.builtin_mask_for_load)
1222 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1223 vector_stmt, stmt_info,
1224 0, vect_prologue);
1225 }
1226
1227 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1228 stmt_info, 0, vect_body);
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1230 stmt_info, 0, vect_body);
1231
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location,
1234 "vect_model_load_cost: explicit realign optimized"
1235 "\n");
1236
1237 break;
1238 }
1239
1240 case dr_unaligned_unsupported:
1241 {
1242 *inside_cost = VECT_MAX_COST;
1243
1244 if (dump_enabled_p ())
1245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1246 "vect_model_load_cost: unsupported access.\n");
1247 break;
1248 }
1249
1250 default:
1251 gcc_unreachable ();
1252 }
1253 }
1254
1255 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1256 the loop preheader for the vectorized stmt STMT. */
1257
1258 static void
1259 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1260 {
1261 if (gsi)
1262 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1263 else
1264 {
1265 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1266 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1267
1268 if (loop_vinfo)
1269 {
1270 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1271 basic_block new_bb;
1272 edge pe;
1273
1274 if (nested_in_vect_loop_p (loop, stmt))
1275 loop = loop->inner;
1276
1277 pe = loop_preheader_edge (loop);
1278 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1279 gcc_assert (!new_bb);
1280 }
1281 else
1282 {
1283 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1284 basic_block bb;
1285 gimple_stmt_iterator gsi_bb_start;
1286
1287 gcc_assert (bb_vinfo);
1288 bb = BB_VINFO_BB (bb_vinfo);
1289 gsi_bb_start = gsi_after_labels (bb);
1290 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1291 }
1292 }
1293
1294 if (dump_enabled_p ())
1295 {
1296 dump_printf_loc (MSG_NOTE, vect_location,
1297 "created new init_stmt: ");
1298 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1299 dump_printf (MSG_NOTE, "\n");
1300 }
1301 }
1302
1303 /* Function vect_init_vector.
1304
1305 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1306 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1307 vector type a vector with all elements equal to VAL is created first.
1308 Place the initialization at BSI if it is not NULL. Otherwise, place the
1309 initialization at the loop preheader.
1310 Return the DEF of INIT_STMT.
1311 It will be used in the vectorization of STMT. */
1312
1313 tree
1314 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1315 {
1316 tree new_var;
1317 gimple init_stmt;
1318 tree vec_oprnd;
1319 tree new_temp;
1320
1321 if (TREE_CODE (type) == VECTOR_TYPE
1322 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1323 {
1324 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1325 {
1326 if (CONSTANT_CLASS_P (val))
1327 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1328 else
1329 {
1330 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1331 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1332 new_temp, val,
1333 NULL_TREE);
1334 vect_init_vector_1 (stmt, init_stmt, gsi);
1335 val = new_temp;
1336 }
1337 }
1338 val = build_vector_from_val (type, val);
1339 }
1340
1341 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1342 init_stmt = gimple_build_assign (new_var, val);
1343 new_temp = make_ssa_name (new_var, init_stmt);
1344 gimple_assign_set_lhs (init_stmt, new_temp);
1345 vect_init_vector_1 (stmt, init_stmt, gsi);
1346 vec_oprnd = gimple_assign_lhs (init_stmt);
1347 return vec_oprnd;
1348 }
1349
1350
1351 /* Function vect_get_vec_def_for_operand.
1352
1353 OP is an operand in STMT. This function returns a (vector) def that will be
1354 used in the vectorized stmt for STMT.
1355
1356 In the case that OP is an SSA_NAME which is defined in the loop, then
1357 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1358
1359 In case OP is an invariant or constant, a new stmt that creates a vector def
1360 needs to be introduced. */
1361
1362 tree
1363 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1364 {
1365 tree vec_oprnd;
1366 gimple vec_stmt;
1367 gimple def_stmt;
1368 stmt_vec_info def_stmt_info = NULL;
1369 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1370 unsigned int nunits;
1371 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1372 tree def;
1373 enum vect_def_type dt;
1374 bool is_simple_use;
1375 tree vector_type;
1376
1377 if (dump_enabled_p ())
1378 {
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "vect_get_vec_def_for_operand: ");
1381 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1382 dump_printf (MSG_NOTE, "\n");
1383 }
1384
1385 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1386 &def_stmt, &def, &dt);
1387 gcc_assert (is_simple_use);
1388 if (dump_enabled_p ())
1389 {
1390 int loc_printed = 0;
1391 if (def)
1392 {
1393 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1394 loc_printed = 1;
1395 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1396 dump_printf (MSG_NOTE, "\n");
1397 }
1398 if (def_stmt)
1399 {
1400 if (loc_printed)
1401 dump_printf (MSG_NOTE, " def_stmt = ");
1402 else
1403 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1404 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1405 dump_printf (MSG_NOTE, "\n");
1406 }
1407 }
1408
1409 switch (dt)
1410 {
1411 /* Case 1: operand is a constant. */
1412 case vect_constant_def:
1413 {
1414 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1415 gcc_assert (vector_type);
1416 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1417
1418 if (scalar_def)
1419 *scalar_def = op;
1420
1421 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1422 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_NOTE, vect_location,
1424 "Create vector_cst. nunits = %d\n", nunits);
1425
1426 return vect_init_vector (stmt, op, vector_type, NULL);
1427 }
1428
1429 /* Case 2: operand is defined outside the loop - loop invariant. */
1430 case vect_external_def:
1431 {
1432 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1433 gcc_assert (vector_type);
1434
1435 if (scalar_def)
1436 *scalar_def = def;
1437
1438 /* Create 'vec_inv = {inv,inv,..,inv}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1441
1442 return vect_init_vector (stmt, def, vector_type, NULL);
1443 }
1444
1445 /* Case 3: operand is defined inside the loop. */
1446 case vect_internal_def:
1447 {
1448 if (scalar_def)
1449 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1450
1451 /* Get the def from the vectorized stmt. */
1452 def_stmt_info = vinfo_for_stmt (def_stmt);
1453
1454 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1455 /* Get vectorized pattern statement. */
1456 if (!vec_stmt
1457 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1458 && !STMT_VINFO_RELEVANT (def_stmt_info))
1459 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1460 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1461 gcc_assert (vec_stmt);
1462 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1463 vec_oprnd = PHI_RESULT (vec_stmt);
1464 else if (is_gimple_call (vec_stmt))
1465 vec_oprnd = gimple_call_lhs (vec_stmt);
1466 else
1467 vec_oprnd = gimple_assign_lhs (vec_stmt);
1468 return vec_oprnd;
1469 }
1470
1471 /* Case 4: operand is defined by a loop header phi - reduction */
1472 case vect_reduction_def:
1473 case vect_double_reduction_def:
1474 case vect_nested_cycle:
1475 {
1476 struct loop *loop;
1477
1478 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1479 loop = (gimple_bb (def_stmt))->loop_father;
1480
1481 /* Get the def before the loop */
1482 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1483 return get_initial_def_for_reduction (stmt, op, scalar_def);
1484 }
1485
1486 /* Case 5: operand is defined by loop-header phi - induction. */
1487 case vect_induction_def:
1488 {
1489 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1490
1491 /* Get the def from the vectorized stmt. */
1492 def_stmt_info = vinfo_for_stmt (def_stmt);
1493 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1494 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1495 vec_oprnd = PHI_RESULT (vec_stmt);
1496 else
1497 vec_oprnd = gimple_get_lhs (vec_stmt);
1498 return vec_oprnd;
1499 }
1500
1501 default:
1502 gcc_unreachable ();
1503 }
1504 }
1505
1506
1507 /* Function vect_get_vec_def_for_stmt_copy
1508
1509 Return a vector-def for an operand. This function is used when the
1510 vectorized stmt to be created (by the caller to this function) is a "copy"
1511 created in case the vectorized result cannot fit in one vector, and several
1512 copies of the vector-stmt are required. In this case the vector-def is
1513 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1514 of the stmt that defines VEC_OPRND.
1515 DT is the type of the vector def VEC_OPRND.
1516
1517 Context:
1518 In case the vectorization factor (VF) is bigger than the number
1519 of elements that can fit in a vectype (nunits), we have to generate
1520 more than one vector stmt to vectorize the scalar stmt. This situation
1521 arises when there are multiple data-types operated upon in the loop; the
1522 smallest data-type determines the VF, and as a result, when vectorizing
1523 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1524 vector stmt (each computing a vector of 'nunits' results, and together
1525 computing 'VF' results in each iteration). This function is called when
1526 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1527 which VF=16 and nunits=4, so the number of copies required is 4):
1528
1529 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1530
1531 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1532 VS1.1: vx.1 = memref1 VS1.2
1533 VS1.2: vx.2 = memref2 VS1.3
1534 VS1.3: vx.3 = memref3
1535
1536 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1537 VSnew.1: vz1 = vx.1 + ... VSnew.2
1538 VSnew.2: vz2 = vx.2 + ... VSnew.3
1539 VSnew.3: vz3 = vx.3 + ...
1540
1541 The vectorization of S1 is explained in vectorizable_load.
1542 The vectorization of S2:
1543 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1544 the function 'vect_get_vec_def_for_operand' is called to
1545 get the relevant vector-def for each operand of S2. For operand x it
1546 returns the vector-def 'vx.0'.
1547
1548 To create the remaining copies of the vector-stmt (VSnew.j), this
1549 function is called to get the relevant vector-def for each operand. It is
1550 obtained from the respective VS1.j stmt, which is recorded in the
1551 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1552
1553 For example, to obtain the vector-def 'vx.1' in order to create the
1554 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1555 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1556 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1557 and return its def ('vx.1').
1558 Overall, to create the above sequence this function will be called 3 times:
1559 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1560 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1561 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1562
1563 tree
1564 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1565 {
1566 gimple vec_stmt_for_operand;
1567 stmt_vec_info def_stmt_info;
1568
1569 /* Do nothing; can reuse same def. */
1570 if (dt == vect_external_def || dt == vect_constant_def )
1571 return vec_oprnd;
1572
1573 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1574 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1575 gcc_assert (def_stmt_info);
1576 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1577 gcc_assert (vec_stmt_for_operand);
1578 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1579 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1580 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1581 else
1582 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1583 return vec_oprnd;
1584 }
1585
1586
1587 /* Get vectorized definitions for the operands to create a copy of an original
1588 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1589
1590 static void
1591 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1592 vec<tree> *vec_oprnds0,
1593 vec<tree> *vec_oprnds1)
1594 {
1595 tree vec_oprnd = vec_oprnds0->pop ();
1596
1597 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1598 vec_oprnds0->quick_push (vec_oprnd);
1599
1600 if (vec_oprnds1 && vec_oprnds1->length ())
1601 {
1602 vec_oprnd = vec_oprnds1->pop ();
1603 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1604 vec_oprnds1->quick_push (vec_oprnd);
1605 }
1606 }
1607
1608
1609 /* Get vectorized definitions for OP0 and OP1.
1610 REDUC_INDEX is the index of reduction operand in case of reduction,
1611 and -1 otherwise. */
1612
1613 void
1614 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1615 vec<tree> *vec_oprnds0,
1616 vec<tree> *vec_oprnds1,
1617 slp_tree slp_node, int reduc_index)
1618 {
1619 if (slp_node)
1620 {
1621 int nops = (op1 == NULL_TREE) ? 1 : 2;
1622 auto_vec<tree> ops (nops);
1623 auto_vec<vec<tree> > vec_defs (nops);
1624
1625 ops.quick_push (op0);
1626 if (op1)
1627 ops.quick_push (op1);
1628
1629 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1630
1631 *vec_oprnds0 = vec_defs[0];
1632 if (op1)
1633 *vec_oprnds1 = vec_defs[1];
1634 }
1635 else
1636 {
1637 tree vec_oprnd;
1638
1639 vec_oprnds0->create (1);
1640 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1641 vec_oprnds0->quick_push (vec_oprnd);
1642
1643 if (op1)
1644 {
1645 vec_oprnds1->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1647 vec_oprnds1->quick_push (vec_oprnd);
1648 }
1649 }
1650 }
1651
1652
1653 /* Function vect_finish_stmt_generation.
1654
1655 Insert a new stmt. */
1656
1657 void
1658 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1659 gimple_stmt_iterator *gsi)
1660 {
1661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1664
1665 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1666
1667 if (!gsi_end_p (*gsi)
1668 && gimple_has_mem_ops (vec_stmt))
1669 {
1670 gimple at_stmt = gsi_stmt (*gsi);
1671 tree vuse = gimple_vuse (at_stmt);
1672 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1673 {
1674 tree vdef = gimple_vdef (at_stmt);
1675 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1676 /* If we have an SSA vuse and insert a store, update virtual
1677 SSA form to avoid triggering the renamer. Do so only
1678 if we can easily see all uses - which is what almost always
1679 happens with the way vectorized stmts are inserted. */
1680 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1681 && ((is_gimple_assign (vec_stmt)
1682 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1683 || (is_gimple_call (vec_stmt)
1684 && !(gimple_call_flags (vec_stmt)
1685 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1686 {
1687 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1688 gimple_set_vdef (vec_stmt, new_vdef);
1689 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1690 }
1691 }
1692 }
1693 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1694
1695 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1696 bb_vinfo));
1697
1698 if (dump_enabled_p ())
1699 {
1700 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1701 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1702 dump_printf (MSG_NOTE, "\n");
1703 }
1704
1705 gimple_set_location (vec_stmt, gimple_location (stmt));
1706
1707 /* While EH edges will generally prevent vectorization, stmt might
1708 e.g. be in a must-not-throw region. Ensure newly created stmts
1709 that could throw are part of the same region. */
1710 int lp_nr = lookup_stmt_eh_lp (stmt);
1711 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1712 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1713 }
1714
1715 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1716 a function declaration if the target has a vectorized version
1717 of the function, or NULL_TREE if the function cannot be vectorized. */
1718
1719 tree
1720 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1721 {
1722 tree fndecl = gimple_call_fndecl (call);
1723
1724 /* We only handle functions that do not read or clobber memory -- i.e.
1725 const or novops ones. */
1726 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1727 return NULL_TREE;
1728
1729 if (!fndecl
1730 || TREE_CODE (fndecl) != FUNCTION_DECL
1731 || !DECL_BUILT_IN (fndecl))
1732 return NULL_TREE;
1733
1734 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1735 vectype_in);
1736 }
1737
1738
1739 static tree permute_vec_elements (tree, tree, tree, gimple,
1740 gimple_stmt_iterator *);
1741
1742
1743 /* Function vectorizable_mask_load_store.
1744
1745 Check if STMT performs a conditional load or store that can be vectorized.
1746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1747 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1749
1750 static bool
1751 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1752 gimple *vec_stmt, slp_tree slp_node)
1753 {
1754 tree vec_dest = NULL;
1755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1756 stmt_vec_info prev_stmt_info;
1757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1758 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1759 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1760 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1761 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1762 tree elem_type;
1763 gimple new_stmt;
1764 tree dummy;
1765 tree dataref_ptr = NULL_TREE;
1766 gimple ptr_incr;
1767 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1768 int ncopies;
1769 int i, j;
1770 bool inv_p;
1771 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1772 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1773 int gather_scale = 1;
1774 enum vect_def_type gather_dt = vect_unknown_def_type;
1775 bool is_store;
1776 tree mask;
1777 gimple def_stmt;
1778 tree def;
1779 enum vect_def_type dt;
1780
1781 if (slp_node != NULL)
1782 return false;
1783
1784 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1785 gcc_assert (ncopies >= 1);
1786
1787 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1788 mask = gimple_call_arg (stmt, 2);
1789 if (TYPE_PRECISION (TREE_TYPE (mask))
1790 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1791 return false;
1792
1793 /* FORNOW. This restriction should be relaxed. */
1794 if (nested_in_vect_loop && ncopies > 1)
1795 {
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "multiple types in nested loop.");
1799 return false;
1800 }
1801
1802 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1803 return false;
1804
1805 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1806 return false;
1807
1808 if (!STMT_VINFO_DATA_REF (stmt_info))
1809 return false;
1810
1811 elem_type = TREE_TYPE (vectype);
1812
1813 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1814 return false;
1815
1816 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1817 return false;
1818
1819 if (STMT_VINFO_GATHER_P (stmt_info))
1820 {
1821 gimple def_stmt;
1822 tree def;
1823 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1824 &gather_off, &gather_scale);
1825 gcc_assert (gather_decl);
1826 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1827 &def_stmt, &def, &gather_dt,
1828 &gather_off_vectype))
1829 {
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832 "gather index use not simple.");
1833 return false;
1834 }
1835
1836 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1837 tree masktype
1838 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1839 if (TREE_CODE (masktype) == INTEGER_TYPE)
1840 {
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "masked gather with integer mask not supported.");
1844 return false;
1845 }
1846 }
1847 else if (tree_int_cst_compare (nested_in_vect_loop
1848 ? STMT_VINFO_DR_STEP (stmt_info)
1849 : DR_STEP (dr), size_zero_node) <= 0)
1850 return false;
1851 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1852 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1853 return false;
1854
1855 if (TREE_CODE (mask) != SSA_NAME)
1856 return false;
1857
1858 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1859 &def_stmt, &def, &dt))
1860 return false;
1861
1862 if (is_store)
1863 {
1864 tree rhs = gimple_call_arg (stmt, 3);
1865 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1866 &def_stmt, &def, &dt))
1867 return false;
1868 }
1869
1870 if (!vec_stmt) /* transformation not required. */
1871 {
1872 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1873 if (is_store)
1874 vect_model_store_cost (stmt_info, ncopies, false, dt,
1875 NULL, NULL, NULL);
1876 else
1877 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1878 return true;
1879 }
1880
1881 /** Transform. **/
1882
1883 if (STMT_VINFO_GATHER_P (stmt_info))
1884 {
1885 tree vec_oprnd0 = NULL_TREE, op;
1886 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1887 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1888 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1889 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1890 tree mask_perm_mask = NULL_TREE;
1891 edge pe = loop_preheader_edge (loop);
1892 gimple_seq seq;
1893 basic_block new_bb;
1894 enum { NARROW, NONE, WIDEN } modifier;
1895 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1896
1897 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1898 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1899 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1900 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1901 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1902 scaletype = TREE_VALUE (arglist);
1903 gcc_checking_assert (types_compatible_p (srctype, rettype)
1904 && types_compatible_p (srctype, masktype));
1905
1906 if (nunits == gather_off_nunits)
1907 modifier = NONE;
1908 else if (nunits == gather_off_nunits / 2)
1909 {
1910 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1911 modifier = WIDEN;
1912
1913 for (i = 0; i < gather_off_nunits; ++i)
1914 sel[i] = i | nunits;
1915
1916 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1917 gcc_assert (perm_mask != NULL_TREE);
1918 }
1919 else if (nunits == gather_off_nunits * 2)
1920 {
1921 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1922 modifier = NARROW;
1923
1924 for (i = 0; i < nunits; ++i)
1925 sel[i] = i < gather_off_nunits
1926 ? i : i + nunits - gather_off_nunits;
1927
1928 perm_mask = vect_gen_perm_mask (vectype, sel);
1929 gcc_assert (perm_mask != NULL_TREE);
1930 ncopies *= 2;
1931 for (i = 0; i < nunits; ++i)
1932 sel[i] = i | gather_off_nunits;
1933 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1934 gcc_assert (mask_perm_mask != NULL_TREE);
1935 }
1936 else
1937 gcc_unreachable ();
1938
1939 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1940
1941 ptr = fold_convert (ptrtype, gather_base);
1942 if (!is_gimple_min_invariant (ptr))
1943 {
1944 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1945 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1946 gcc_assert (!new_bb);
1947 }
1948
1949 scale = build_int_cst (scaletype, gather_scale);
1950
1951 prev_stmt_info = NULL;
1952 for (j = 0; j < ncopies; ++j)
1953 {
1954 if (modifier == WIDEN && (j & 1))
1955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1956 perm_mask, stmt, gsi);
1957 else if (j == 0)
1958 op = vec_oprnd0
1959 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1960 else
1961 op = vec_oprnd0
1962 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1963
1964 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1965 {
1966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1967 == TYPE_VECTOR_SUBPARTS (idxtype));
1968 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1969 var = make_ssa_name (var, NULL);
1970 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1971 new_stmt
1972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1973 op, NULL_TREE);
1974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1975 op = var;
1976 }
1977
1978 if (mask_perm_mask && (j & 1))
1979 mask_op = permute_vec_elements (mask_op, mask_op,
1980 mask_perm_mask, stmt, gsi);
1981 else
1982 {
1983 if (j == 0)
1984 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1985 else
1986 {
1987 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1988 &def_stmt, &def, &dt);
1989 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1990 }
1991
1992 mask_op = vec_mask;
1993 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1994 {
1995 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1996 == TYPE_VECTOR_SUBPARTS (masktype));
1997 var = vect_get_new_vect_var (masktype, vect_simple_var,
1998 NULL);
1999 var = make_ssa_name (var, NULL);
2000 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2001 new_stmt
2002 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2003 mask_op, NULL_TREE);
2004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2005 mask_op = var;
2006 }
2007 }
2008
2009 new_stmt
2010 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2011 scale);
2012
2013 if (!useless_type_conversion_p (vectype, rettype))
2014 {
2015 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2016 == TYPE_VECTOR_SUBPARTS (rettype));
2017 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2018 op = make_ssa_name (var, new_stmt);
2019 gimple_call_set_lhs (new_stmt, op);
2020 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2021 var = make_ssa_name (vec_dest, NULL);
2022 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2023 new_stmt
2024 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2025 NULL_TREE);
2026 }
2027 else
2028 {
2029 var = make_ssa_name (vec_dest, new_stmt);
2030 gimple_call_set_lhs (new_stmt, var);
2031 }
2032
2033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2034
2035 if (modifier == NARROW)
2036 {
2037 if ((j & 1) == 0)
2038 {
2039 prev_res = var;
2040 continue;
2041 }
2042 var = permute_vec_elements (prev_res, var,
2043 perm_mask, stmt, gsi);
2044 new_stmt = SSA_NAME_DEF_STMT (var);
2045 }
2046
2047 if (prev_stmt_info == NULL)
2048 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2049 else
2050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2051 prev_stmt_info = vinfo_for_stmt (new_stmt);
2052 }
2053
2054 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2055 from the IL. */
2056 tree lhs = gimple_call_lhs (stmt);
2057 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2058 set_vinfo_for_stmt (new_stmt, stmt_info);
2059 set_vinfo_for_stmt (stmt, NULL);
2060 STMT_VINFO_STMT (stmt_info) = new_stmt;
2061 gsi_replace (gsi, new_stmt, true);
2062 return true;
2063 }
2064 else if (is_store)
2065 {
2066 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2067 prev_stmt_info = NULL;
2068 for (i = 0; i < ncopies; i++)
2069 {
2070 unsigned align, misalign;
2071
2072 if (i == 0)
2073 {
2074 tree rhs = gimple_call_arg (stmt, 3);
2075 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2076 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2077 /* We should have catched mismatched types earlier. */
2078 gcc_assert (useless_type_conversion_p (vectype,
2079 TREE_TYPE (vec_rhs)));
2080 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2081 NULL_TREE, &dummy, gsi,
2082 &ptr_incr, false, &inv_p);
2083 gcc_assert (!inv_p);
2084 }
2085 else
2086 {
2087 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2088 &def, &dt);
2089 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2090 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2091 &def, &dt);
2092 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2093 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2094 TYPE_SIZE_UNIT (vectype));
2095 }
2096
2097 align = TYPE_ALIGN_UNIT (vectype);
2098 if (aligned_access_p (dr))
2099 misalign = 0;
2100 else if (DR_MISALIGNMENT (dr) == -1)
2101 {
2102 align = TYPE_ALIGN_UNIT (elem_type);
2103 misalign = 0;
2104 }
2105 else
2106 misalign = DR_MISALIGNMENT (dr);
2107 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2108 misalign);
2109 new_stmt
2110 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2111 gimple_call_arg (stmt, 1),
2112 vec_mask, vec_rhs);
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114 if (i == 0)
2115 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2116 else
2117 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2118 prev_stmt_info = vinfo_for_stmt (new_stmt);
2119 }
2120 }
2121 else
2122 {
2123 tree vec_mask = NULL_TREE;
2124 prev_stmt_info = NULL;
2125 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2126 for (i = 0; i < ncopies; i++)
2127 {
2128 unsigned align, misalign;
2129
2130 if (i == 0)
2131 {
2132 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2133 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2134 NULL_TREE, &dummy, gsi,
2135 &ptr_incr, false, &inv_p);
2136 gcc_assert (!inv_p);
2137 }
2138 else
2139 {
2140 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2141 &def, &dt);
2142 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2143 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2144 TYPE_SIZE_UNIT (vectype));
2145 }
2146
2147 align = TYPE_ALIGN_UNIT (vectype);
2148 if (aligned_access_p (dr))
2149 misalign = 0;
2150 else if (DR_MISALIGNMENT (dr) == -1)
2151 {
2152 align = TYPE_ALIGN_UNIT (elem_type);
2153 misalign = 0;
2154 }
2155 else
2156 misalign = DR_MISALIGNMENT (dr);
2157 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2158 misalign);
2159 new_stmt
2160 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2161 gimple_call_arg (stmt, 1),
2162 vec_mask);
2163 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2165 if (i == 0)
2166 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2167 else
2168 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2169 prev_stmt_info = vinfo_for_stmt (new_stmt);
2170 }
2171 }
2172
2173 if (!is_store)
2174 {
2175 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2176 from the IL. */
2177 tree lhs = gimple_call_lhs (stmt);
2178 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2179 set_vinfo_for_stmt (new_stmt, stmt_info);
2180 set_vinfo_for_stmt (stmt, NULL);
2181 STMT_VINFO_STMT (stmt_info) = new_stmt;
2182 gsi_replace (gsi, new_stmt, true);
2183 }
2184
2185 return true;
2186 }
2187
2188
2189 /* Function vectorizable_call.
2190
2191 Check if STMT performs a function call that can be vectorized.
2192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2195
2196 static bool
2197 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2198 slp_tree slp_node)
2199 {
2200 tree vec_dest;
2201 tree scalar_dest;
2202 tree op, type;
2203 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2205 tree vectype_out, vectype_in;
2206 int nunits_in;
2207 int nunits_out;
2208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2209 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2210 tree fndecl, new_temp, def, rhs_type;
2211 gimple def_stmt;
2212 enum vect_def_type dt[3]
2213 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2214 gimple new_stmt = NULL;
2215 int ncopies, j;
2216 vec<tree> vargs = vNULL;
2217 enum { NARROW, NONE, WIDEN } modifier;
2218 size_t i, nargs;
2219 tree lhs;
2220
2221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2222 return false;
2223
2224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2225 return false;
2226
2227 /* Is STMT a vectorizable call? */
2228 if (!is_gimple_call (stmt))
2229 return false;
2230
2231 if (gimple_call_internal_p (stmt)
2232 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2233 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2234 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2235 slp_node);
2236
2237 if (gimple_call_lhs (stmt) == NULL_TREE
2238 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2239 return false;
2240
2241 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2242
2243 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2244
2245 /* Process function arguments. */
2246 rhs_type = NULL_TREE;
2247 vectype_in = NULL_TREE;
2248 nargs = gimple_call_num_args (stmt);
2249
2250 /* Bail out if the function has more than three arguments, we do not have
2251 interesting builtin functions to vectorize with more than two arguments
2252 except for fma. No arguments is also not good. */
2253 if (nargs == 0 || nargs > 3)
2254 return false;
2255
2256 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2257 if (gimple_call_internal_p (stmt)
2258 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2259 {
2260 nargs = 0;
2261 rhs_type = unsigned_type_node;
2262 }
2263
2264 for (i = 0; i < nargs; i++)
2265 {
2266 tree opvectype;
2267
2268 op = gimple_call_arg (stmt, i);
2269
2270 /* We can only handle calls with arguments of the same type. */
2271 if (rhs_type
2272 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2273 {
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "argument types differ.\n");
2277 return false;
2278 }
2279 if (!rhs_type)
2280 rhs_type = TREE_TYPE (op);
2281
2282 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2283 &def_stmt, &def, &dt[i], &opvectype))
2284 {
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "use not simple.\n");
2288 return false;
2289 }
2290
2291 if (!vectype_in)
2292 vectype_in = opvectype;
2293 else if (opvectype
2294 && opvectype != vectype_in)
2295 {
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "argument vector types differ.\n");
2299 return false;
2300 }
2301 }
2302 /* If all arguments are external or constant defs use a vector type with
2303 the same size as the output vector type. */
2304 if (!vectype_in)
2305 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2306 if (vec_stmt)
2307 gcc_assert (vectype_in);
2308 if (!vectype_in)
2309 {
2310 if (dump_enabled_p ())
2311 {
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2313 "no vectype for scalar type ");
2314 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2315 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2316 }
2317
2318 return false;
2319 }
2320
2321 /* FORNOW */
2322 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2323 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2324 if (nunits_in == nunits_out / 2)
2325 modifier = NARROW;
2326 else if (nunits_out == nunits_in)
2327 modifier = NONE;
2328 else if (nunits_out == nunits_in / 2)
2329 modifier = WIDEN;
2330 else
2331 return false;
2332
2333 /* For now, we only vectorize functions if a target specific builtin
2334 is available. TODO -- in some cases, it might be profitable to
2335 insert the calls for pieces of the vector, in order to be able
2336 to vectorize other operations in the loop. */
2337 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2338 if (fndecl == NULL_TREE)
2339 {
2340 if (gimple_call_internal_p (stmt)
2341 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2342 && !slp_node
2343 && loop_vinfo
2344 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2345 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2346 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2347 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2348 {
2349 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2350 { 0, 1, 2, ... vf - 1 } vector. */
2351 gcc_assert (nargs == 0);
2352 }
2353 else
2354 {
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2357 "function is not vectorizable.\n");
2358 return false;
2359 }
2360 }
2361
2362 gcc_assert (!gimple_vuse (stmt));
2363
2364 if (slp_node || PURE_SLP_STMT (stmt_info))
2365 ncopies = 1;
2366 else if (modifier == NARROW)
2367 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2368 else
2369 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2370
2371 /* Sanity check: make sure that at least one copy of the vectorized stmt
2372 needs to be generated. */
2373 gcc_assert (ncopies >= 1);
2374
2375 if (!vec_stmt) /* transformation not required. */
2376 {
2377 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2380 "\n");
2381 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2382 return true;
2383 }
2384
2385 /** Transform. **/
2386
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2389
2390 /* Handle def. */
2391 scalar_dest = gimple_call_lhs (stmt);
2392 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2393
2394 prev_stmt_info = NULL;
2395 switch (modifier)
2396 {
2397 case NONE:
2398 for (j = 0; j < ncopies; ++j)
2399 {
2400 /* Build argument list for the vectorized call. */
2401 if (j == 0)
2402 vargs.create (nargs);
2403 else
2404 vargs.truncate (0);
2405
2406 if (slp_node)
2407 {
2408 auto_vec<vec<tree> > vec_defs (nargs);
2409 vec<tree> vec_oprnds0;
2410
2411 for (i = 0; i < nargs; i++)
2412 vargs.quick_push (gimple_call_arg (stmt, i));
2413 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2414 vec_oprnds0 = vec_defs[0];
2415
2416 /* Arguments are ready. Create the new vector stmt. */
2417 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2418 {
2419 size_t k;
2420 for (k = 0; k < nargs; k++)
2421 {
2422 vec<tree> vec_oprndsk = vec_defs[k];
2423 vargs[k] = vec_oprndsk[i];
2424 }
2425 new_stmt = gimple_build_call_vec (fndecl, vargs);
2426 new_temp = make_ssa_name (vec_dest, new_stmt);
2427 gimple_call_set_lhs (new_stmt, new_temp);
2428 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2429 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2430 }
2431
2432 for (i = 0; i < nargs; i++)
2433 {
2434 vec<tree> vec_oprndsi = vec_defs[i];
2435 vec_oprndsi.release ();
2436 }
2437 continue;
2438 }
2439
2440 for (i = 0; i < nargs; i++)
2441 {
2442 op = gimple_call_arg (stmt, i);
2443 if (j == 0)
2444 vec_oprnd0
2445 = vect_get_vec_def_for_operand (op, stmt, NULL);
2446 else
2447 {
2448 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2449 vec_oprnd0
2450 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2451 }
2452
2453 vargs.quick_push (vec_oprnd0);
2454 }
2455
2456 if (gimple_call_internal_p (stmt)
2457 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2458 {
2459 tree *v = XALLOCAVEC (tree, nunits_out);
2460 int k;
2461 for (k = 0; k < nunits_out; ++k)
2462 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2463 tree cst = build_vector (vectype_out, v);
2464 tree new_var
2465 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2466 gimple init_stmt = gimple_build_assign (new_var, cst);
2467 new_temp = make_ssa_name (new_var, init_stmt);
2468 gimple_assign_set_lhs (init_stmt, new_temp);
2469 vect_init_vector_1 (stmt, init_stmt, NULL);
2470 new_temp = make_ssa_name (vec_dest, NULL);
2471 new_stmt = gimple_build_assign (new_temp,
2472 gimple_assign_lhs (init_stmt));
2473 }
2474 else
2475 {
2476 new_stmt = gimple_build_call_vec (fndecl, vargs);
2477 new_temp = make_ssa_name (vec_dest, new_stmt);
2478 gimple_call_set_lhs (new_stmt, new_temp);
2479 }
2480 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2481
2482 if (j == 0)
2483 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2484 else
2485 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2486
2487 prev_stmt_info = vinfo_for_stmt (new_stmt);
2488 }
2489
2490 break;
2491
2492 case NARROW:
2493 for (j = 0; j < ncopies; ++j)
2494 {
2495 /* Build argument list for the vectorized call. */
2496 if (j == 0)
2497 vargs.create (nargs * 2);
2498 else
2499 vargs.truncate (0);
2500
2501 if (slp_node)
2502 {
2503 auto_vec<vec<tree> > vec_defs (nargs);
2504 vec<tree> vec_oprnds0;
2505
2506 for (i = 0; i < nargs; i++)
2507 vargs.quick_push (gimple_call_arg (stmt, i));
2508 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2509 vec_oprnds0 = vec_defs[0];
2510
2511 /* Arguments are ready. Create the new vector stmt. */
2512 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2513 {
2514 size_t k;
2515 vargs.truncate (0);
2516 for (k = 0; k < nargs; k++)
2517 {
2518 vec<tree> vec_oprndsk = vec_defs[k];
2519 vargs.quick_push (vec_oprndsk[i]);
2520 vargs.quick_push (vec_oprndsk[i + 1]);
2521 }
2522 new_stmt = gimple_build_call_vec (fndecl, vargs);
2523 new_temp = make_ssa_name (vec_dest, new_stmt);
2524 gimple_call_set_lhs (new_stmt, new_temp);
2525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2526 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2527 }
2528
2529 for (i = 0; i < nargs; i++)
2530 {
2531 vec<tree> vec_oprndsi = vec_defs[i];
2532 vec_oprndsi.release ();
2533 }
2534 continue;
2535 }
2536
2537 for (i = 0; i < nargs; i++)
2538 {
2539 op = gimple_call_arg (stmt, i);
2540 if (j == 0)
2541 {
2542 vec_oprnd0
2543 = vect_get_vec_def_for_operand (op, stmt, NULL);
2544 vec_oprnd1
2545 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2546 }
2547 else
2548 {
2549 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2550 vec_oprnd0
2551 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2552 vec_oprnd1
2553 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2554 }
2555
2556 vargs.quick_push (vec_oprnd0);
2557 vargs.quick_push (vec_oprnd1);
2558 }
2559
2560 new_stmt = gimple_build_call_vec (fndecl, vargs);
2561 new_temp = make_ssa_name (vec_dest, new_stmt);
2562 gimple_call_set_lhs (new_stmt, new_temp);
2563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2564
2565 if (j == 0)
2566 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2567 else
2568 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2569
2570 prev_stmt_info = vinfo_for_stmt (new_stmt);
2571 }
2572
2573 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2574
2575 break;
2576
2577 case WIDEN:
2578 /* No current target implements this case. */
2579 return false;
2580 }
2581
2582 vargs.release ();
2583
2584 /* The call in STMT might prevent it from being removed in dce.
2585 We however cannot remove it here, due to the way the ssa name
2586 it defines is mapped to the new definition. So just replace
2587 rhs of the statement with something harmless. */
2588
2589 if (slp_node)
2590 return true;
2591
2592 type = TREE_TYPE (scalar_dest);
2593 if (is_pattern_stmt_p (stmt_info))
2594 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2595 else
2596 lhs = gimple_call_lhs (stmt);
2597 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2598 set_vinfo_for_stmt (new_stmt, stmt_info);
2599 set_vinfo_for_stmt (stmt, NULL);
2600 STMT_VINFO_STMT (stmt_info) = new_stmt;
2601 gsi_replace (gsi, new_stmt, false);
2602
2603 return true;
2604 }
2605
2606
2607 struct simd_call_arg_info
2608 {
2609 tree vectype;
2610 tree op;
2611 enum vect_def_type dt;
2612 HOST_WIDE_INT linear_step;
2613 unsigned int align;
2614 };
2615
2616 /* Function vectorizable_simd_clone_call.
2617
2618 Check if STMT performs a function call that can be vectorized
2619 by calling a simd clone of the function.
2620 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2621 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2622 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2623
2624 static bool
2625 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2626 gimple *vec_stmt, slp_tree slp_node)
2627 {
2628 tree vec_dest;
2629 tree scalar_dest;
2630 tree op, type;
2631 tree vec_oprnd0 = NULL_TREE;
2632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2633 tree vectype;
2634 unsigned int nunits;
2635 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2636 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2637 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2638 tree fndecl, new_temp, def;
2639 gimple def_stmt;
2640 gimple new_stmt = NULL;
2641 int ncopies, j;
2642 vec<simd_call_arg_info> arginfo = vNULL;
2643 vec<tree> vargs = vNULL;
2644 size_t i, nargs;
2645 tree lhs, rtype, ratype;
2646 vec<constructor_elt, va_gc> *ret_ctor_elts;
2647
2648 /* Is STMT a vectorizable call? */
2649 if (!is_gimple_call (stmt))
2650 return false;
2651
2652 fndecl = gimple_call_fndecl (stmt);
2653 if (fndecl == NULL_TREE)
2654 return false;
2655
2656 struct cgraph_node *node = cgraph_node::get (fndecl);
2657 if (node == NULL || node->simd_clones == NULL)
2658 return false;
2659
2660 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2661 return false;
2662
2663 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2664 return false;
2665
2666 if (gimple_call_lhs (stmt)
2667 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2668 return false;
2669
2670 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2671
2672 vectype = STMT_VINFO_VECTYPE (stmt_info);
2673
2674 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2675 return false;
2676
2677 /* FORNOW */
2678 if (slp_node || PURE_SLP_STMT (stmt_info))
2679 return false;
2680
2681 /* Process function arguments. */
2682 nargs = gimple_call_num_args (stmt);
2683
2684 /* Bail out if the function has zero arguments. */
2685 if (nargs == 0)
2686 return false;
2687
2688 arginfo.create (nargs);
2689
2690 for (i = 0; i < nargs; i++)
2691 {
2692 simd_call_arg_info thisarginfo;
2693 affine_iv iv;
2694
2695 thisarginfo.linear_step = 0;
2696 thisarginfo.align = 0;
2697 thisarginfo.op = NULL_TREE;
2698
2699 op = gimple_call_arg (stmt, i);
2700 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2701 &def_stmt, &def, &thisarginfo.dt,
2702 &thisarginfo.vectype)
2703 || thisarginfo.dt == vect_uninitialized_def)
2704 {
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2707 "use not simple.\n");
2708 arginfo.release ();
2709 return false;
2710 }
2711
2712 if (thisarginfo.dt == vect_constant_def
2713 || thisarginfo.dt == vect_external_def)
2714 gcc_assert (thisarginfo.vectype == NULL_TREE);
2715 else
2716 gcc_assert (thisarginfo.vectype != NULL_TREE);
2717
2718 if (thisarginfo.dt != vect_constant_def
2719 && thisarginfo.dt != vect_external_def
2720 && loop_vinfo
2721 && TREE_CODE (op) == SSA_NAME
2722 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2723 && tree_fits_shwi_p (iv.step))
2724 {
2725 thisarginfo.linear_step = tree_to_shwi (iv.step);
2726 thisarginfo.op = iv.base;
2727 }
2728 else if ((thisarginfo.dt == vect_constant_def
2729 || thisarginfo.dt == vect_external_def)
2730 && POINTER_TYPE_P (TREE_TYPE (op)))
2731 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2732
2733 arginfo.quick_push (thisarginfo);
2734 }
2735
2736 unsigned int badness = 0;
2737 struct cgraph_node *bestn = NULL;
2738 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2739 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2740 else
2741 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2742 n = n->simdclone->next_clone)
2743 {
2744 unsigned int this_badness = 0;
2745 if (n->simdclone->simdlen
2746 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2747 || n->simdclone->nargs != nargs)
2748 continue;
2749 if (n->simdclone->simdlen
2750 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2751 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2752 - exact_log2 (n->simdclone->simdlen)) * 1024;
2753 if (n->simdclone->inbranch)
2754 this_badness += 2048;
2755 int target_badness = targetm.simd_clone.usable (n);
2756 if (target_badness < 0)
2757 continue;
2758 this_badness += target_badness * 512;
2759 /* FORNOW: Have to add code to add the mask argument. */
2760 if (n->simdclone->inbranch)
2761 continue;
2762 for (i = 0; i < nargs; i++)
2763 {
2764 switch (n->simdclone->args[i].arg_type)
2765 {
2766 case SIMD_CLONE_ARG_TYPE_VECTOR:
2767 if (!useless_type_conversion_p
2768 (n->simdclone->args[i].orig_type,
2769 TREE_TYPE (gimple_call_arg (stmt, i))))
2770 i = -1;
2771 else if (arginfo[i].dt == vect_constant_def
2772 || arginfo[i].dt == vect_external_def
2773 || arginfo[i].linear_step)
2774 this_badness += 64;
2775 break;
2776 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2777 if (arginfo[i].dt != vect_constant_def
2778 && arginfo[i].dt != vect_external_def)
2779 i = -1;
2780 break;
2781 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2782 if (arginfo[i].dt == vect_constant_def
2783 || arginfo[i].dt == vect_external_def
2784 || (arginfo[i].linear_step
2785 != n->simdclone->args[i].linear_step))
2786 i = -1;
2787 break;
2788 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2789 /* FORNOW */
2790 i = -1;
2791 break;
2792 case SIMD_CLONE_ARG_TYPE_MASK:
2793 gcc_unreachable ();
2794 }
2795 if (i == (size_t) -1)
2796 break;
2797 if (n->simdclone->args[i].alignment > arginfo[i].align)
2798 {
2799 i = -1;
2800 break;
2801 }
2802 if (arginfo[i].align)
2803 this_badness += (exact_log2 (arginfo[i].align)
2804 - exact_log2 (n->simdclone->args[i].alignment));
2805 }
2806 if (i == (size_t) -1)
2807 continue;
2808 if (bestn == NULL || this_badness < badness)
2809 {
2810 bestn = n;
2811 badness = this_badness;
2812 }
2813 }
2814
2815 if (bestn == NULL)
2816 {
2817 arginfo.release ();
2818 return false;
2819 }
2820
2821 for (i = 0; i < nargs; i++)
2822 if ((arginfo[i].dt == vect_constant_def
2823 || arginfo[i].dt == vect_external_def)
2824 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2825 {
2826 arginfo[i].vectype
2827 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2828 i)));
2829 if (arginfo[i].vectype == NULL
2830 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2831 > bestn->simdclone->simdlen))
2832 {
2833 arginfo.release ();
2834 return false;
2835 }
2836 }
2837
2838 fndecl = bestn->decl;
2839 nunits = bestn->simdclone->simdlen;
2840 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2841
2842 /* If the function isn't const, only allow it in simd loops where user
2843 has asserted that at least nunits consecutive iterations can be
2844 performed using SIMD instructions. */
2845 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2846 && gimple_vuse (stmt))
2847 {
2848 arginfo.release ();
2849 return false;
2850 }
2851
2852 /* Sanity check: make sure that at least one copy of the vectorized stmt
2853 needs to be generated. */
2854 gcc_assert (ncopies >= 1);
2855
2856 if (!vec_stmt) /* transformation not required. */
2857 {
2858 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2859 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2860 if (dump_enabled_p ())
2861 dump_printf_loc (MSG_NOTE, vect_location,
2862 "=== vectorizable_simd_clone_call ===\n");
2863 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2864 arginfo.release ();
2865 return true;
2866 }
2867
2868 /** Transform. **/
2869
2870 if (dump_enabled_p ())
2871 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2872
2873 /* Handle def. */
2874 scalar_dest = gimple_call_lhs (stmt);
2875 vec_dest = NULL_TREE;
2876 rtype = NULL_TREE;
2877 ratype = NULL_TREE;
2878 if (scalar_dest)
2879 {
2880 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2881 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2882 if (TREE_CODE (rtype) == ARRAY_TYPE)
2883 {
2884 ratype = rtype;
2885 rtype = TREE_TYPE (ratype);
2886 }
2887 }
2888
2889 prev_stmt_info = NULL;
2890 for (j = 0; j < ncopies; ++j)
2891 {
2892 /* Build argument list for the vectorized call. */
2893 if (j == 0)
2894 vargs.create (nargs);
2895 else
2896 vargs.truncate (0);
2897
2898 for (i = 0; i < nargs; i++)
2899 {
2900 unsigned int k, l, m, o;
2901 tree atype;
2902 op = gimple_call_arg (stmt, i);
2903 switch (bestn->simdclone->args[i].arg_type)
2904 {
2905 case SIMD_CLONE_ARG_TYPE_VECTOR:
2906 atype = bestn->simdclone->args[i].vector_type;
2907 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2908 for (m = j * o; m < (j + 1) * o; m++)
2909 {
2910 if (TYPE_VECTOR_SUBPARTS (atype)
2911 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2912 {
2913 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2914 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2915 / TYPE_VECTOR_SUBPARTS (atype));
2916 gcc_assert ((k & (k - 1)) == 0);
2917 if (m == 0)
2918 vec_oprnd0
2919 = vect_get_vec_def_for_operand (op, stmt, NULL);
2920 else
2921 {
2922 vec_oprnd0 = arginfo[i].op;
2923 if ((m & (k - 1)) == 0)
2924 vec_oprnd0
2925 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2926 vec_oprnd0);
2927 }
2928 arginfo[i].op = vec_oprnd0;
2929 vec_oprnd0
2930 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2931 size_int (prec),
2932 bitsize_int ((m & (k - 1)) * prec));
2933 new_stmt
2934 = gimple_build_assign (make_ssa_name (atype, NULL),
2935 vec_oprnd0);
2936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2937 vargs.safe_push (gimple_assign_lhs (new_stmt));
2938 }
2939 else
2940 {
2941 k = (TYPE_VECTOR_SUBPARTS (atype)
2942 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2943 gcc_assert ((k & (k - 1)) == 0);
2944 vec<constructor_elt, va_gc> *ctor_elts;
2945 if (k != 1)
2946 vec_alloc (ctor_elts, k);
2947 else
2948 ctor_elts = NULL;
2949 for (l = 0; l < k; l++)
2950 {
2951 if (m == 0 && l == 0)
2952 vec_oprnd0
2953 = vect_get_vec_def_for_operand (op, stmt, NULL);
2954 else
2955 vec_oprnd0
2956 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2957 arginfo[i].op);
2958 arginfo[i].op = vec_oprnd0;
2959 if (k == 1)
2960 break;
2961 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2962 vec_oprnd0);
2963 }
2964 if (k == 1)
2965 vargs.safe_push (vec_oprnd0);
2966 else
2967 {
2968 vec_oprnd0 = build_constructor (atype, ctor_elts);
2969 new_stmt
2970 = gimple_build_assign (make_ssa_name (atype, NULL),
2971 vec_oprnd0);
2972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2973 vargs.safe_push (gimple_assign_lhs (new_stmt));
2974 }
2975 }
2976 }
2977 break;
2978 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2979 vargs.safe_push (op);
2980 break;
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2982 if (j == 0)
2983 {
2984 gimple_seq stmts;
2985 arginfo[i].op
2986 = force_gimple_operand (arginfo[i].op, &stmts, true,
2987 NULL_TREE);
2988 if (stmts != NULL)
2989 {
2990 basic_block new_bb;
2991 edge pe = loop_preheader_edge (loop);
2992 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2993 gcc_assert (!new_bb);
2994 }
2995 tree phi_res = copy_ssa_name (op, NULL);
2996 gimple new_phi = create_phi_node (phi_res, loop->header);
2997 set_vinfo_for_stmt (new_phi,
2998 new_stmt_vec_info (new_phi, loop_vinfo,
2999 NULL));
3000 add_phi_arg (new_phi, arginfo[i].op,
3001 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3002 enum tree_code code
3003 = POINTER_TYPE_P (TREE_TYPE (op))
3004 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3005 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3006 ? sizetype : TREE_TYPE (op);
3007 widest_int cst
3008 = wi::mul (bestn->simdclone->args[i].linear_step,
3009 ncopies * nunits);
3010 tree tcst = wide_int_to_tree (type, cst);
3011 tree phi_arg = copy_ssa_name (op, NULL);
3012 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3013 phi_res, tcst);
3014 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3015 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3016 set_vinfo_for_stmt (new_stmt,
3017 new_stmt_vec_info (new_stmt, loop_vinfo,
3018 NULL));
3019 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3020 UNKNOWN_LOCATION);
3021 arginfo[i].op = phi_res;
3022 vargs.safe_push (phi_res);
3023 }
3024 else
3025 {
3026 enum tree_code code
3027 = POINTER_TYPE_P (TREE_TYPE (op))
3028 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3029 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3030 ? sizetype : TREE_TYPE (op);
3031 widest_int cst
3032 = wi::mul (bestn->simdclone->args[i].linear_step,
3033 j * nunits);
3034 tree tcst = wide_int_to_tree (type, cst);
3035 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3036 new_stmt
3037 = gimple_build_assign_with_ops (code, new_temp,
3038 arginfo[i].op, tcst);
3039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3040 vargs.safe_push (new_temp);
3041 }
3042 break;
3043 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3044 default:
3045 gcc_unreachable ();
3046 }
3047 }
3048
3049 new_stmt = gimple_build_call_vec (fndecl, vargs);
3050 if (vec_dest)
3051 {
3052 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3053 if (ratype)
3054 new_temp = create_tmp_var (ratype, NULL);
3055 else if (TYPE_VECTOR_SUBPARTS (vectype)
3056 == TYPE_VECTOR_SUBPARTS (rtype))
3057 new_temp = make_ssa_name (vec_dest, new_stmt);
3058 else
3059 new_temp = make_ssa_name (rtype, new_stmt);
3060 gimple_call_set_lhs (new_stmt, new_temp);
3061 }
3062 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3063
3064 if (vec_dest)
3065 {
3066 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3067 {
3068 unsigned int k, l;
3069 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3070 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3071 gcc_assert ((k & (k - 1)) == 0);
3072 for (l = 0; l < k; l++)
3073 {
3074 tree t;
3075 if (ratype)
3076 {
3077 t = build_fold_addr_expr (new_temp);
3078 t = build2 (MEM_REF, vectype, t,
3079 build_int_cst (TREE_TYPE (t),
3080 l * prec / BITS_PER_UNIT));
3081 }
3082 else
3083 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3084 size_int (prec), bitsize_int (l * prec));
3085 new_stmt
3086 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3088 if (j == 0 && l == 0)
3089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3090 else
3091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3092
3093 prev_stmt_info = vinfo_for_stmt (new_stmt);
3094 }
3095
3096 if (ratype)
3097 {
3098 tree clobber = build_constructor (ratype, NULL);
3099 TREE_THIS_VOLATILE (clobber) = 1;
3100 new_stmt = gimple_build_assign (new_temp, clobber);
3101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3102 }
3103 continue;
3104 }
3105 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3106 {
3107 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3108 / TYPE_VECTOR_SUBPARTS (rtype));
3109 gcc_assert ((k & (k - 1)) == 0);
3110 if ((j & (k - 1)) == 0)
3111 vec_alloc (ret_ctor_elts, k);
3112 if (ratype)
3113 {
3114 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3115 for (m = 0; m < o; m++)
3116 {
3117 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3118 size_int (m), NULL_TREE, NULL_TREE);
3119 new_stmt
3120 = gimple_build_assign (make_ssa_name (rtype, NULL),
3121 tem);
3122 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3123 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3124 gimple_assign_lhs (new_stmt));
3125 }
3126 tree clobber = build_constructor (ratype, NULL);
3127 TREE_THIS_VOLATILE (clobber) = 1;
3128 new_stmt = gimple_build_assign (new_temp, clobber);
3129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3130 }
3131 else
3132 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3133 if ((j & (k - 1)) != k - 1)
3134 continue;
3135 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3136 new_stmt
3137 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3138 vec_oprnd0);
3139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3140
3141 if ((unsigned) j == k - 1)
3142 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3143 else
3144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3145
3146 prev_stmt_info = vinfo_for_stmt (new_stmt);
3147 continue;
3148 }
3149 else if (ratype)
3150 {
3151 tree t = build_fold_addr_expr (new_temp);
3152 t = build2 (MEM_REF, vectype, t,
3153 build_int_cst (TREE_TYPE (t), 0));
3154 new_stmt
3155 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3157 tree clobber = build_constructor (ratype, NULL);
3158 TREE_THIS_VOLATILE (clobber) = 1;
3159 vect_finish_stmt_generation (stmt,
3160 gimple_build_assign (new_temp,
3161 clobber), gsi);
3162 }
3163 }
3164
3165 if (j == 0)
3166 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3167 else
3168 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3169
3170 prev_stmt_info = vinfo_for_stmt (new_stmt);
3171 }
3172
3173 vargs.release ();
3174
3175 /* The call in STMT might prevent it from being removed in dce.
3176 We however cannot remove it here, due to the way the ssa name
3177 it defines is mapped to the new definition. So just replace
3178 rhs of the statement with something harmless. */
3179
3180 if (slp_node)
3181 return true;
3182
3183 if (scalar_dest)
3184 {
3185 type = TREE_TYPE (scalar_dest);
3186 if (is_pattern_stmt_p (stmt_info))
3187 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3188 else
3189 lhs = gimple_call_lhs (stmt);
3190 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3191 }
3192 else
3193 new_stmt = gimple_build_nop ();
3194 set_vinfo_for_stmt (new_stmt, stmt_info);
3195 set_vinfo_for_stmt (stmt, NULL);
3196 STMT_VINFO_STMT (stmt_info) = new_stmt;
3197 gsi_replace (gsi, new_stmt, false);
3198 unlink_stmt_vdef (stmt);
3199
3200 return true;
3201 }
3202
3203
3204 /* Function vect_gen_widened_results_half
3205
3206 Create a vector stmt whose code, type, number of arguments, and result
3207 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3208 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3209 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3210 needs to be created (DECL is a function-decl of a target-builtin).
3211 STMT is the original scalar stmt that we are vectorizing. */
3212
3213 static gimple
3214 vect_gen_widened_results_half (enum tree_code code,
3215 tree decl,
3216 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3217 tree vec_dest, gimple_stmt_iterator *gsi,
3218 gimple stmt)
3219 {
3220 gimple new_stmt;
3221 tree new_temp;
3222
3223 /* Generate half of the widened result: */
3224 if (code == CALL_EXPR)
3225 {
3226 /* Target specific support */
3227 if (op_type == binary_op)
3228 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3229 else
3230 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3231 new_temp = make_ssa_name (vec_dest, new_stmt);
3232 gimple_call_set_lhs (new_stmt, new_temp);
3233 }
3234 else
3235 {
3236 /* Generic support */
3237 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3238 if (op_type != binary_op)
3239 vec_oprnd1 = NULL;
3240 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3241 vec_oprnd1);
3242 new_temp = make_ssa_name (vec_dest, new_stmt);
3243 gimple_assign_set_lhs (new_stmt, new_temp);
3244 }
3245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3246
3247 return new_stmt;
3248 }
3249
3250
3251 /* Get vectorized definitions for loop-based vectorization. For the first
3252 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3253 scalar operand), and for the rest we get a copy with
3254 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3255 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3256 The vectors are collected into VEC_OPRNDS. */
3257
3258 static void
3259 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3260 vec<tree> *vec_oprnds, int multi_step_cvt)
3261 {
3262 tree vec_oprnd;
3263
3264 /* Get first vector operand. */
3265 /* All the vector operands except the very first one (that is scalar oprnd)
3266 are stmt copies. */
3267 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3268 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3269 else
3270 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3271
3272 vec_oprnds->quick_push (vec_oprnd);
3273
3274 /* Get second vector operand. */
3275 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3276 vec_oprnds->quick_push (vec_oprnd);
3277
3278 *oprnd = vec_oprnd;
3279
3280 /* For conversion in multiple steps, continue to get operands
3281 recursively. */
3282 if (multi_step_cvt)
3283 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3284 }
3285
3286
3287 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3288 For multi-step conversions store the resulting vectors and call the function
3289 recursively. */
3290
3291 static void
3292 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3293 int multi_step_cvt, gimple stmt,
3294 vec<tree> vec_dsts,
3295 gimple_stmt_iterator *gsi,
3296 slp_tree slp_node, enum tree_code code,
3297 stmt_vec_info *prev_stmt_info)
3298 {
3299 unsigned int i;
3300 tree vop0, vop1, new_tmp, vec_dest;
3301 gimple new_stmt;
3302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3303
3304 vec_dest = vec_dsts.pop ();
3305
3306 for (i = 0; i < vec_oprnds->length (); i += 2)
3307 {
3308 /* Create demotion operation. */
3309 vop0 = (*vec_oprnds)[i];
3310 vop1 = (*vec_oprnds)[i + 1];
3311 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3312 new_tmp = make_ssa_name (vec_dest, new_stmt);
3313 gimple_assign_set_lhs (new_stmt, new_tmp);
3314 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3315
3316 if (multi_step_cvt)
3317 /* Store the resulting vector for next recursive call. */
3318 (*vec_oprnds)[i/2] = new_tmp;
3319 else
3320 {
3321 /* This is the last step of the conversion sequence. Store the
3322 vectors in SLP_NODE or in vector info of the scalar statement
3323 (or in STMT_VINFO_RELATED_STMT chain). */
3324 if (slp_node)
3325 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3326 else
3327 {
3328 if (!*prev_stmt_info)
3329 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3330 else
3331 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3332
3333 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3334 }
3335 }
3336 }
3337
3338 /* For multi-step demotion operations we first generate demotion operations
3339 from the source type to the intermediate types, and then combine the
3340 results (stored in VEC_OPRNDS) in demotion operation to the destination
3341 type. */
3342 if (multi_step_cvt)
3343 {
3344 /* At each level of recursion we have half of the operands we had at the
3345 previous level. */
3346 vec_oprnds->truncate ((i+1)/2);
3347 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3348 stmt, vec_dsts, gsi, slp_node,
3349 VEC_PACK_TRUNC_EXPR,
3350 prev_stmt_info);
3351 }
3352
3353 vec_dsts.quick_push (vec_dest);
3354 }
3355
3356
3357 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3358 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3359 the resulting vectors and call the function recursively. */
3360
3361 static void
3362 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3363 vec<tree> *vec_oprnds1,
3364 gimple stmt, tree vec_dest,
3365 gimple_stmt_iterator *gsi,
3366 enum tree_code code1,
3367 enum tree_code code2, tree decl1,
3368 tree decl2, int op_type)
3369 {
3370 int i;
3371 tree vop0, vop1, new_tmp1, new_tmp2;
3372 gimple new_stmt1, new_stmt2;
3373 vec<tree> vec_tmp = vNULL;
3374
3375 vec_tmp.create (vec_oprnds0->length () * 2);
3376 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3377 {
3378 if (op_type == binary_op)
3379 vop1 = (*vec_oprnds1)[i];
3380 else
3381 vop1 = NULL_TREE;
3382
3383 /* Generate the two halves of promotion operation. */
3384 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3385 op_type, vec_dest, gsi, stmt);
3386 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3387 op_type, vec_dest, gsi, stmt);
3388 if (is_gimple_call (new_stmt1))
3389 {
3390 new_tmp1 = gimple_call_lhs (new_stmt1);
3391 new_tmp2 = gimple_call_lhs (new_stmt2);
3392 }
3393 else
3394 {
3395 new_tmp1 = gimple_assign_lhs (new_stmt1);
3396 new_tmp2 = gimple_assign_lhs (new_stmt2);
3397 }
3398
3399 /* Store the results for the next step. */
3400 vec_tmp.quick_push (new_tmp1);
3401 vec_tmp.quick_push (new_tmp2);
3402 }
3403
3404 vec_oprnds0->release ();
3405 *vec_oprnds0 = vec_tmp;
3406 }
3407
3408
3409 /* Check if STMT performs a conversion operation, that can be vectorized.
3410 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3411 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3412 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3413
3414 static bool
3415 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3416 gimple *vec_stmt, slp_tree slp_node)
3417 {
3418 tree vec_dest;
3419 tree scalar_dest;
3420 tree op0, op1 = NULL_TREE;
3421 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3422 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3424 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3425 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3426 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3427 tree new_temp;
3428 tree def;
3429 gimple def_stmt;
3430 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3431 gimple new_stmt = NULL;
3432 stmt_vec_info prev_stmt_info;
3433 int nunits_in;
3434 int nunits_out;
3435 tree vectype_out, vectype_in;
3436 int ncopies, i, j;
3437 tree lhs_type, rhs_type;
3438 enum { NARROW, NONE, WIDEN } modifier;
3439 vec<tree> vec_oprnds0 = vNULL;
3440 vec<tree> vec_oprnds1 = vNULL;
3441 tree vop0;
3442 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3443 int multi_step_cvt = 0;
3444 vec<tree> vec_dsts = vNULL;
3445 vec<tree> interm_types = vNULL;
3446 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3447 int op_type;
3448 enum machine_mode rhs_mode;
3449 unsigned short fltsz;
3450
3451 /* Is STMT a vectorizable conversion? */
3452
3453 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3454 return false;
3455
3456 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3457 return false;
3458
3459 if (!is_gimple_assign (stmt))
3460 return false;
3461
3462 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3463 return false;
3464
3465 code = gimple_assign_rhs_code (stmt);
3466 if (!CONVERT_EXPR_CODE_P (code)
3467 && code != FIX_TRUNC_EXPR
3468 && code != FLOAT_EXPR
3469 && code != WIDEN_MULT_EXPR
3470 && code != WIDEN_LSHIFT_EXPR)
3471 return false;
3472
3473 op_type = TREE_CODE_LENGTH (code);
3474
3475 /* Check types of lhs and rhs. */
3476 scalar_dest = gimple_assign_lhs (stmt);
3477 lhs_type = TREE_TYPE (scalar_dest);
3478 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3479
3480 op0 = gimple_assign_rhs1 (stmt);
3481 rhs_type = TREE_TYPE (op0);
3482
3483 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3484 && !((INTEGRAL_TYPE_P (lhs_type)
3485 && INTEGRAL_TYPE_P (rhs_type))
3486 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3487 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3488 return false;
3489
3490 if ((INTEGRAL_TYPE_P (lhs_type)
3491 && (TYPE_PRECISION (lhs_type)
3492 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3493 || (INTEGRAL_TYPE_P (rhs_type)
3494 && (TYPE_PRECISION (rhs_type)
3495 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3496 {
3497 if (dump_enabled_p ())
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3499 "type conversion to/from bit-precision unsupported."
3500 "\n");
3501 return false;
3502 }
3503
3504 /* Check the operands of the operation. */
3505 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3506 &def_stmt, &def, &dt[0], &vectype_in))
3507 {
3508 if (dump_enabled_p ())
3509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3510 "use not simple.\n");
3511 return false;
3512 }
3513 if (op_type == binary_op)
3514 {
3515 bool ok;
3516
3517 op1 = gimple_assign_rhs2 (stmt);
3518 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3519 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3520 OP1. */
3521 if (CONSTANT_CLASS_P (op0))
3522 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3523 &def_stmt, &def, &dt[1], &vectype_in);
3524 else
3525 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3526 &def, &dt[1]);
3527
3528 if (!ok)
3529 {
3530 if (dump_enabled_p ())
3531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3532 "use not simple.\n");
3533 return false;
3534 }
3535 }
3536
3537 /* If op0 is an external or constant defs use a vector type of
3538 the same size as the output vector type. */
3539 if (!vectype_in)
3540 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3541 if (vec_stmt)
3542 gcc_assert (vectype_in);
3543 if (!vectype_in)
3544 {
3545 if (dump_enabled_p ())
3546 {
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548 "no vectype for scalar type ");
3549 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3550 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3551 }
3552
3553 return false;
3554 }
3555
3556 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3557 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3558 if (nunits_in < nunits_out)
3559 modifier = NARROW;
3560 else if (nunits_out == nunits_in)
3561 modifier = NONE;
3562 else
3563 modifier = WIDEN;
3564
3565 /* Multiple types in SLP are handled by creating the appropriate number of
3566 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3567 case of SLP. */
3568 if (slp_node || PURE_SLP_STMT (stmt_info))
3569 ncopies = 1;
3570 else if (modifier == NARROW)
3571 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3572 else
3573 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3574
3575 /* Sanity check: make sure that at least one copy of the vectorized stmt
3576 needs to be generated. */
3577 gcc_assert (ncopies >= 1);
3578
3579 /* Supportable by target? */
3580 switch (modifier)
3581 {
3582 case NONE:
3583 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3584 return false;
3585 if (supportable_convert_operation (code, vectype_out, vectype_in,
3586 &decl1, &code1))
3587 break;
3588 /* FALLTHRU */
3589 unsupported:
3590 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3592 "conversion not supported by target.\n");
3593 return false;
3594
3595 case WIDEN:
3596 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3597 &code1, &code2, &multi_step_cvt,
3598 &interm_types))
3599 {
3600 /* Binary widening operation can only be supported directly by the
3601 architecture. */
3602 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3603 break;
3604 }
3605
3606 if (code != FLOAT_EXPR
3607 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3608 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3609 goto unsupported;
3610
3611 rhs_mode = TYPE_MODE (rhs_type);
3612 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3613 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3614 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3615 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3616 {
3617 cvt_type
3618 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3619 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3620 if (cvt_type == NULL_TREE)
3621 goto unsupported;
3622
3623 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3624 {
3625 if (!supportable_convert_operation (code, vectype_out,
3626 cvt_type, &decl1, &codecvt1))
3627 goto unsupported;
3628 }
3629 else if (!supportable_widening_operation (code, stmt, vectype_out,
3630 cvt_type, &codecvt1,
3631 &codecvt2, &multi_step_cvt,
3632 &interm_types))
3633 continue;
3634 else
3635 gcc_assert (multi_step_cvt == 0);
3636
3637 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3638 vectype_in, &code1, &code2,
3639 &multi_step_cvt, &interm_types))
3640 break;
3641 }
3642
3643 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3644 goto unsupported;
3645
3646 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3647 codecvt2 = ERROR_MARK;
3648 else
3649 {
3650 multi_step_cvt++;
3651 interm_types.safe_push (cvt_type);
3652 cvt_type = NULL_TREE;
3653 }
3654 break;
3655
3656 case NARROW:
3657 gcc_assert (op_type == unary_op);
3658 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3659 &code1, &multi_step_cvt,
3660 &interm_types))
3661 break;
3662
3663 if (code != FIX_TRUNC_EXPR
3664 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3665 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3666 goto unsupported;
3667
3668 rhs_mode = TYPE_MODE (rhs_type);
3669 cvt_type
3670 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3671 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3672 if (cvt_type == NULL_TREE)
3673 goto unsupported;
3674 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3675 &decl1, &codecvt1))
3676 goto unsupported;
3677 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3678 &code1, &multi_step_cvt,
3679 &interm_types))
3680 break;
3681 goto unsupported;
3682
3683 default:
3684 gcc_unreachable ();
3685 }
3686
3687 if (!vec_stmt) /* transformation not required. */
3688 {
3689 if (dump_enabled_p ())
3690 dump_printf_loc (MSG_NOTE, vect_location,
3691 "=== vectorizable_conversion ===\n");
3692 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3693 {
3694 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3695 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3696 }
3697 else if (modifier == NARROW)
3698 {
3699 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3700 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3701 }
3702 else
3703 {
3704 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3705 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3706 }
3707 interm_types.release ();
3708 return true;
3709 }
3710
3711 /** Transform. **/
3712 if (dump_enabled_p ())
3713 dump_printf_loc (MSG_NOTE, vect_location,
3714 "transform conversion. ncopies = %d.\n", ncopies);
3715
3716 if (op_type == binary_op)
3717 {
3718 if (CONSTANT_CLASS_P (op0))
3719 op0 = fold_convert (TREE_TYPE (op1), op0);
3720 else if (CONSTANT_CLASS_P (op1))
3721 op1 = fold_convert (TREE_TYPE (op0), op1);
3722 }
3723
3724 /* In case of multi-step conversion, we first generate conversion operations
3725 to the intermediate types, and then from that types to the final one.
3726 We create vector destinations for the intermediate type (TYPES) received
3727 from supportable_*_operation, and store them in the correct order
3728 for future use in vect_create_vectorized_*_stmts (). */
3729 vec_dsts.create (multi_step_cvt + 1);
3730 vec_dest = vect_create_destination_var (scalar_dest,
3731 (cvt_type && modifier == WIDEN)
3732 ? cvt_type : vectype_out);
3733 vec_dsts.quick_push (vec_dest);
3734
3735 if (multi_step_cvt)
3736 {
3737 for (i = interm_types.length () - 1;
3738 interm_types.iterate (i, &intermediate_type); i--)
3739 {
3740 vec_dest = vect_create_destination_var (scalar_dest,
3741 intermediate_type);
3742 vec_dsts.quick_push (vec_dest);
3743 }
3744 }
3745
3746 if (cvt_type)
3747 vec_dest = vect_create_destination_var (scalar_dest,
3748 modifier == WIDEN
3749 ? vectype_out : cvt_type);
3750
3751 if (!slp_node)
3752 {
3753 if (modifier == WIDEN)
3754 {
3755 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3756 if (op_type == binary_op)
3757 vec_oprnds1.create (1);
3758 }
3759 else if (modifier == NARROW)
3760 vec_oprnds0.create (
3761 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3762 }
3763 else if (code == WIDEN_LSHIFT_EXPR)
3764 vec_oprnds1.create (slp_node->vec_stmts_size);
3765
3766 last_oprnd = op0;
3767 prev_stmt_info = NULL;
3768 switch (modifier)
3769 {
3770 case NONE:
3771 for (j = 0; j < ncopies; j++)
3772 {
3773 if (j == 0)
3774 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3775 -1);
3776 else
3777 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3778
3779 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3780 {
3781 /* Arguments are ready, create the new vector stmt. */
3782 if (code1 == CALL_EXPR)
3783 {
3784 new_stmt = gimple_build_call (decl1, 1, vop0);
3785 new_temp = make_ssa_name (vec_dest, new_stmt);
3786 gimple_call_set_lhs (new_stmt, new_temp);
3787 }
3788 else
3789 {
3790 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3791 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3792 vop0, NULL);
3793 new_temp = make_ssa_name (vec_dest, new_stmt);
3794 gimple_assign_set_lhs (new_stmt, new_temp);
3795 }
3796
3797 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3798 if (slp_node)
3799 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3800 }
3801
3802 if (j == 0)
3803 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3804 else
3805 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3806 prev_stmt_info = vinfo_for_stmt (new_stmt);
3807 }
3808 break;
3809
3810 case WIDEN:
3811 /* In case the vectorization factor (VF) is bigger than the number
3812 of elements that we can fit in a vectype (nunits), we have to
3813 generate more than one vector stmt - i.e - we need to "unroll"
3814 the vector stmt by a factor VF/nunits. */
3815 for (j = 0; j < ncopies; j++)
3816 {
3817 /* Handle uses. */
3818 if (j == 0)
3819 {
3820 if (slp_node)
3821 {
3822 if (code == WIDEN_LSHIFT_EXPR)
3823 {
3824 unsigned int k;
3825
3826 vec_oprnd1 = op1;
3827 /* Store vec_oprnd1 for every vector stmt to be created
3828 for SLP_NODE. We check during the analysis that all
3829 the shift arguments are the same. */
3830 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3831 vec_oprnds1.quick_push (vec_oprnd1);
3832
3833 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3834 slp_node, -1);
3835 }
3836 else
3837 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3838 &vec_oprnds1, slp_node, -1);
3839 }
3840 else
3841 {
3842 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3843 vec_oprnds0.quick_push (vec_oprnd0);
3844 if (op_type == binary_op)
3845 {
3846 if (code == WIDEN_LSHIFT_EXPR)
3847 vec_oprnd1 = op1;
3848 else
3849 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3850 NULL);
3851 vec_oprnds1.quick_push (vec_oprnd1);
3852 }
3853 }
3854 }
3855 else
3856 {
3857 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3858 vec_oprnds0.truncate (0);
3859 vec_oprnds0.quick_push (vec_oprnd0);
3860 if (op_type == binary_op)
3861 {
3862 if (code == WIDEN_LSHIFT_EXPR)
3863 vec_oprnd1 = op1;
3864 else
3865 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3866 vec_oprnd1);
3867 vec_oprnds1.truncate (0);
3868 vec_oprnds1.quick_push (vec_oprnd1);
3869 }
3870 }
3871
3872 /* Arguments are ready. Create the new vector stmts. */
3873 for (i = multi_step_cvt; i >= 0; i--)
3874 {
3875 tree this_dest = vec_dsts[i];
3876 enum tree_code c1 = code1, c2 = code2;
3877 if (i == 0 && codecvt2 != ERROR_MARK)
3878 {
3879 c1 = codecvt1;
3880 c2 = codecvt2;
3881 }
3882 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3883 &vec_oprnds1,
3884 stmt, this_dest, gsi,
3885 c1, c2, decl1, decl2,
3886 op_type);
3887 }
3888
3889 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3890 {
3891 if (cvt_type)
3892 {
3893 if (codecvt1 == CALL_EXPR)
3894 {
3895 new_stmt = gimple_build_call (decl1, 1, vop0);
3896 new_temp = make_ssa_name (vec_dest, new_stmt);
3897 gimple_call_set_lhs (new_stmt, new_temp);
3898 }
3899 else
3900 {
3901 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3902 new_temp = make_ssa_name (vec_dest, NULL);
3903 new_stmt = gimple_build_assign_with_ops (codecvt1,
3904 new_temp,
3905 vop0, NULL);
3906 }
3907
3908 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3909 }
3910 else
3911 new_stmt = SSA_NAME_DEF_STMT (vop0);
3912
3913 if (slp_node)
3914 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3915 else
3916 {
3917 if (!prev_stmt_info)
3918 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3919 else
3920 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3921 prev_stmt_info = vinfo_for_stmt (new_stmt);
3922 }
3923 }
3924 }
3925
3926 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3927 break;
3928
3929 case NARROW:
3930 /* In case the vectorization factor (VF) is bigger than the number
3931 of elements that we can fit in a vectype (nunits), we have to
3932 generate more than one vector stmt - i.e - we need to "unroll"
3933 the vector stmt by a factor VF/nunits. */
3934 for (j = 0; j < ncopies; j++)
3935 {
3936 /* Handle uses. */
3937 if (slp_node)
3938 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3939 slp_node, -1);
3940 else
3941 {
3942 vec_oprnds0.truncate (0);
3943 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3944 vect_pow2 (multi_step_cvt) - 1);
3945 }
3946
3947 /* Arguments are ready. Create the new vector stmts. */
3948 if (cvt_type)
3949 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3950 {
3951 if (codecvt1 == CALL_EXPR)
3952 {
3953 new_stmt = gimple_build_call (decl1, 1, vop0);
3954 new_temp = make_ssa_name (vec_dest, new_stmt);
3955 gimple_call_set_lhs (new_stmt, new_temp);
3956 }
3957 else
3958 {
3959 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3960 new_temp = make_ssa_name (vec_dest, NULL);
3961 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3962 vop0, NULL);
3963 }
3964
3965 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3966 vec_oprnds0[i] = new_temp;
3967 }
3968
3969 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3970 stmt, vec_dsts, gsi,
3971 slp_node, code1,
3972 &prev_stmt_info);
3973 }
3974
3975 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3976 break;
3977 }
3978
3979 vec_oprnds0.release ();
3980 vec_oprnds1.release ();
3981 vec_dsts.release ();
3982 interm_types.release ();
3983
3984 return true;
3985 }
3986
3987
3988 /* Function vectorizable_assignment.
3989
3990 Check if STMT performs an assignment (copy) that can be vectorized.
3991 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3992 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3993 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3994
3995 static bool
3996 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3997 gimple *vec_stmt, slp_tree slp_node)
3998 {
3999 tree vec_dest;
4000 tree scalar_dest;
4001 tree op;
4002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4003 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4004 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4005 tree new_temp;
4006 tree def;
4007 gimple def_stmt;
4008 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4009 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4010 int ncopies;
4011 int i, j;
4012 vec<tree> vec_oprnds = vNULL;
4013 tree vop;
4014 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4015 gimple new_stmt = NULL;
4016 stmt_vec_info prev_stmt_info = NULL;
4017 enum tree_code code;
4018 tree vectype_in;
4019
4020 /* Multiple types in SLP are handled by creating the appropriate number of
4021 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4022 case of SLP. */
4023 if (slp_node || PURE_SLP_STMT (stmt_info))
4024 ncopies = 1;
4025 else
4026 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4027
4028 gcc_assert (ncopies >= 1);
4029
4030 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4031 return false;
4032
4033 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4034 return false;
4035
4036 /* Is vectorizable assignment? */
4037 if (!is_gimple_assign (stmt))
4038 return false;
4039
4040 scalar_dest = gimple_assign_lhs (stmt);
4041 if (TREE_CODE (scalar_dest) != SSA_NAME)
4042 return false;
4043
4044 code = gimple_assign_rhs_code (stmt);
4045 if (gimple_assign_single_p (stmt)
4046 || code == PAREN_EXPR
4047 || CONVERT_EXPR_CODE_P (code))
4048 op = gimple_assign_rhs1 (stmt);
4049 else
4050 return false;
4051
4052 if (code == VIEW_CONVERT_EXPR)
4053 op = TREE_OPERAND (op, 0);
4054
4055 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4056 &def_stmt, &def, &dt[0], &vectype_in))
4057 {
4058 if (dump_enabled_p ())
4059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4060 "use not simple.\n");
4061 return false;
4062 }
4063
4064 /* We can handle NOP_EXPR conversions that do not change the number
4065 of elements or the vector size. */
4066 if ((CONVERT_EXPR_CODE_P (code)
4067 || code == VIEW_CONVERT_EXPR)
4068 && (!vectype_in
4069 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4070 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4071 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4072 return false;
4073
4074 /* We do not handle bit-precision changes. */
4075 if ((CONVERT_EXPR_CODE_P (code)
4076 || code == VIEW_CONVERT_EXPR)
4077 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4078 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4079 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4080 || ((TYPE_PRECISION (TREE_TYPE (op))
4081 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4082 /* But a conversion that does not change the bit-pattern is ok. */
4083 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4084 > TYPE_PRECISION (TREE_TYPE (op)))
4085 && TYPE_UNSIGNED (TREE_TYPE (op))))
4086 {
4087 if (dump_enabled_p ())
4088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4089 "type conversion to/from bit-precision "
4090 "unsupported.\n");
4091 return false;
4092 }
4093
4094 if (!vec_stmt) /* transformation not required. */
4095 {
4096 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4097 if (dump_enabled_p ())
4098 dump_printf_loc (MSG_NOTE, vect_location,
4099 "=== vectorizable_assignment ===\n");
4100 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4101 return true;
4102 }
4103
4104 /** Transform. **/
4105 if (dump_enabled_p ())
4106 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4107
4108 /* Handle def. */
4109 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4110
4111 /* Handle use. */
4112 for (j = 0; j < ncopies; j++)
4113 {
4114 /* Handle uses. */
4115 if (j == 0)
4116 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4117 else
4118 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4119
4120 /* Arguments are ready. create the new vector stmt. */
4121 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4122 {
4123 if (CONVERT_EXPR_CODE_P (code)
4124 || code == VIEW_CONVERT_EXPR)
4125 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4126 new_stmt = gimple_build_assign (vec_dest, vop);
4127 new_temp = make_ssa_name (vec_dest, new_stmt);
4128 gimple_assign_set_lhs (new_stmt, new_temp);
4129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4130 if (slp_node)
4131 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4132 }
4133
4134 if (slp_node)
4135 continue;
4136
4137 if (j == 0)
4138 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4139 else
4140 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4141
4142 prev_stmt_info = vinfo_for_stmt (new_stmt);
4143 }
4144
4145 vec_oprnds.release ();
4146 return true;
4147 }
4148
4149
4150 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4151 either as shift by a scalar or by a vector. */
4152
4153 bool
4154 vect_supportable_shift (enum tree_code code, tree scalar_type)
4155 {
4156
4157 enum machine_mode vec_mode;
4158 optab optab;
4159 int icode;
4160 tree vectype;
4161
4162 vectype = get_vectype_for_scalar_type (scalar_type);
4163 if (!vectype)
4164 return false;
4165
4166 optab = optab_for_tree_code (code, vectype, optab_scalar);
4167 if (!optab
4168 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4169 {
4170 optab = optab_for_tree_code (code, vectype, optab_vector);
4171 if (!optab
4172 || (optab_handler (optab, TYPE_MODE (vectype))
4173 == CODE_FOR_nothing))
4174 return false;
4175 }
4176
4177 vec_mode = TYPE_MODE (vectype);
4178 icode = (int) optab_handler (optab, vec_mode);
4179 if (icode == CODE_FOR_nothing)
4180 return false;
4181
4182 return true;
4183 }
4184
4185
4186 /* Function vectorizable_shift.
4187
4188 Check if STMT performs a shift operation that can be vectorized.
4189 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4190 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4191 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4192
4193 static bool
4194 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4195 gimple *vec_stmt, slp_tree slp_node)
4196 {
4197 tree vec_dest;
4198 tree scalar_dest;
4199 tree op0, op1 = NULL;
4200 tree vec_oprnd1 = NULL_TREE;
4201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4202 tree vectype;
4203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4204 enum tree_code code;
4205 enum machine_mode vec_mode;
4206 tree new_temp;
4207 optab optab;
4208 int icode;
4209 enum machine_mode optab_op2_mode;
4210 tree def;
4211 gimple def_stmt;
4212 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4213 gimple new_stmt = NULL;
4214 stmt_vec_info prev_stmt_info;
4215 int nunits_in;
4216 int nunits_out;
4217 tree vectype_out;
4218 tree op1_vectype;
4219 int ncopies;
4220 int j, i;
4221 vec<tree> vec_oprnds0 = vNULL;
4222 vec<tree> vec_oprnds1 = vNULL;
4223 tree vop0, vop1;
4224 unsigned int k;
4225 bool scalar_shift_arg = true;
4226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4227 int vf;
4228
4229 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4230 return false;
4231
4232 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4233 return false;
4234
4235 /* Is STMT a vectorizable binary/unary operation? */
4236 if (!is_gimple_assign (stmt))
4237 return false;
4238
4239 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4240 return false;
4241
4242 code = gimple_assign_rhs_code (stmt);
4243
4244 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4245 || code == RROTATE_EXPR))
4246 return false;
4247
4248 scalar_dest = gimple_assign_lhs (stmt);
4249 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4250 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4251 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4252 {
4253 if (dump_enabled_p ())
4254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4255 "bit-precision shifts not supported.\n");
4256 return false;
4257 }
4258
4259 op0 = gimple_assign_rhs1 (stmt);
4260 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4261 &def_stmt, &def, &dt[0], &vectype))
4262 {
4263 if (dump_enabled_p ())
4264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4265 "use not simple.\n");
4266 return false;
4267 }
4268 /* If op0 is an external or constant def use a vector type with
4269 the same size as the output vector type. */
4270 if (!vectype)
4271 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4272 if (vec_stmt)
4273 gcc_assert (vectype);
4274 if (!vectype)
4275 {
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4278 "no vectype for scalar type\n");
4279 return false;
4280 }
4281
4282 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4283 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4284 if (nunits_out != nunits_in)
4285 return false;
4286
4287 op1 = gimple_assign_rhs2 (stmt);
4288 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4289 &def, &dt[1], &op1_vectype))
4290 {
4291 if (dump_enabled_p ())
4292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4293 "use not simple.\n");
4294 return false;
4295 }
4296
4297 if (loop_vinfo)
4298 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4299 else
4300 vf = 1;
4301
4302 /* Multiple types in SLP are handled by creating the appropriate number of
4303 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4304 case of SLP. */
4305 if (slp_node || PURE_SLP_STMT (stmt_info))
4306 ncopies = 1;
4307 else
4308 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4309
4310 gcc_assert (ncopies >= 1);
4311
4312 /* Determine whether the shift amount is a vector, or scalar. If the
4313 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4314
4315 if (dt[1] == vect_internal_def && !slp_node)
4316 scalar_shift_arg = false;
4317 else if (dt[1] == vect_constant_def
4318 || dt[1] == vect_external_def
4319 || dt[1] == vect_internal_def)
4320 {
4321 /* In SLP, need to check whether the shift count is the same,
4322 in loops if it is a constant or invariant, it is always
4323 a scalar shift. */
4324 if (slp_node)
4325 {
4326 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4327 gimple slpstmt;
4328
4329 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4330 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4331 scalar_shift_arg = false;
4332 }
4333 }
4334 else
4335 {
4336 if (dump_enabled_p ())
4337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4338 "operand mode requires invariant argument.\n");
4339 return false;
4340 }
4341
4342 /* Vector shifted by vector. */
4343 if (!scalar_shift_arg)
4344 {
4345 optab = optab_for_tree_code (code, vectype, optab_vector);
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE, vect_location,
4348 "vector/vector shift/rotate found.\n");
4349
4350 if (!op1_vectype)
4351 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4352 if (op1_vectype == NULL_TREE
4353 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4354 {
4355 if (dump_enabled_p ())
4356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4357 "unusable type for last operand in"
4358 " vector/vector shift/rotate.\n");
4359 return false;
4360 }
4361 }
4362 /* See if the machine has a vector shifted by scalar insn and if not
4363 then see if it has a vector shifted by vector insn. */
4364 else
4365 {
4366 optab = optab_for_tree_code (code, vectype, optab_scalar);
4367 if (optab
4368 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4369 {
4370 if (dump_enabled_p ())
4371 dump_printf_loc (MSG_NOTE, vect_location,
4372 "vector/scalar shift/rotate found.\n");
4373 }
4374 else
4375 {
4376 optab = optab_for_tree_code (code, vectype, optab_vector);
4377 if (optab
4378 && (optab_handler (optab, TYPE_MODE (vectype))
4379 != CODE_FOR_nothing))
4380 {
4381 scalar_shift_arg = false;
4382
4383 if (dump_enabled_p ())
4384 dump_printf_loc (MSG_NOTE, vect_location,
4385 "vector/vector shift/rotate found.\n");
4386
4387 /* Unlike the other binary operators, shifts/rotates have
4388 the rhs being int, instead of the same type as the lhs,
4389 so make sure the scalar is the right type if we are
4390 dealing with vectors of long long/long/short/char. */
4391 if (dt[1] == vect_constant_def)
4392 op1 = fold_convert (TREE_TYPE (vectype), op1);
4393 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4394 TREE_TYPE (op1)))
4395 {
4396 if (slp_node
4397 && TYPE_MODE (TREE_TYPE (vectype))
4398 != TYPE_MODE (TREE_TYPE (op1)))
4399 {
4400 if (dump_enabled_p ())
4401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4402 "unusable type for last operand in"
4403 " vector/vector shift/rotate.\n");
4404 return false;
4405 }
4406 if (vec_stmt && !slp_node)
4407 {
4408 op1 = fold_convert (TREE_TYPE (vectype), op1);
4409 op1 = vect_init_vector (stmt, op1,
4410 TREE_TYPE (vectype), NULL);
4411 }
4412 }
4413 }
4414 }
4415 }
4416
4417 /* Supportable by target? */
4418 if (!optab)
4419 {
4420 if (dump_enabled_p ())
4421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4422 "no optab.\n");
4423 return false;
4424 }
4425 vec_mode = TYPE_MODE (vectype);
4426 icode = (int) optab_handler (optab, vec_mode);
4427 if (icode == CODE_FOR_nothing)
4428 {
4429 if (dump_enabled_p ())
4430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4431 "op not supported by target.\n");
4432 /* Check only during analysis. */
4433 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4434 || (vf < vect_min_worthwhile_factor (code)
4435 && !vec_stmt))
4436 return false;
4437 if (dump_enabled_p ())
4438 dump_printf_loc (MSG_NOTE, vect_location,
4439 "proceeding using word mode.\n");
4440 }
4441
4442 /* Worthwhile without SIMD support? Check only during analysis. */
4443 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4444 && vf < vect_min_worthwhile_factor (code)
4445 && !vec_stmt)
4446 {
4447 if (dump_enabled_p ())
4448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4449 "not worthwhile without SIMD support.\n");
4450 return false;
4451 }
4452
4453 if (!vec_stmt) /* transformation not required. */
4454 {
4455 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_NOTE, vect_location,
4458 "=== vectorizable_shift ===\n");
4459 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4460 return true;
4461 }
4462
4463 /** Transform. **/
4464
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_NOTE, vect_location,
4467 "transform binary/unary operation.\n");
4468
4469 /* Handle def. */
4470 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4471
4472 prev_stmt_info = NULL;
4473 for (j = 0; j < ncopies; j++)
4474 {
4475 /* Handle uses. */
4476 if (j == 0)
4477 {
4478 if (scalar_shift_arg)
4479 {
4480 /* Vector shl and shr insn patterns can be defined with scalar
4481 operand 2 (shift operand). In this case, use constant or loop
4482 invariant op1 directly, without extending it to vector mode
4483 first. */
4484 optab_op2_mode = insn_data[icode].operand[2].mode;
4485 if (!VECTOR_MODE_P (optab_op2_mode))
4486 {
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_NOTE, vect_location,
4489 "operand 1 using scalar mode.\n");
4490 vec_oprnd1 = op1;
4491 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4492 vec_oprnds1.quick_push (vec_oprnd1);
4493 if (slp_node)
4494 {
4495 /* Store vec_oprnd1 for every vector stmt to be created
4496 for SLP_NODE. We check during the analysis that all
4497 the shift arguments are the same.
4498 TODO: Allow different constants for different vector
4499 stmts generated for an SLP instance. */
4500 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4501 vec_oprnds1.quick_push (vec_oprnd1);
4502 }
4503 }
4504 }
4505
4506 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4507 (a special case for certain kind of vector shifts); otherwise,
4508 operand 1 should be of a vector type (the usual case). */
4509 if (vec_oprnd1)
4510 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4511 slp_node, -1);
4512 else
4513 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4514 slp_node, -1);
4515 }
4516 else
4517 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4518
4519 /* Arguments are ready. Create the new vector stmt. */
4520 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4521 {
4522 vop1 = vec_oprnds1[i];
4523 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4524 new_temp = make_ssa_name (vec_dest, new_stmt);
4525 gimple_assign_set_lhs (new_stmt, new_temp);
4526 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4527 if (slp_node)
4528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4529 }
4530
4531 if (slp_node)
4532 continue;
4533
4534 if (j == 0)
4535 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4536 else
4537 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4538 prev_stmt_info = vinfo_for_stmt (new_stmt);
4539 }
4540
4541 vec_oprnds0.release ();
4542 vec_oprnds1.release ();
4543
4544 return true;
4545 }
4546
4547
4548 /* Function vectorizable_operation.
4549
4550 Check if STMT performs a binary, unary or ternary operation that can
4551 be vectorized.
4552 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4553 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4554 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4555
4556 static bool
4557 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4558 gimple *vec_stmt, slp_tree slp_node)
4559 {
4560 tree vec_dest;
4561 tree scalar_dest;
4562 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4564 tree vectype;
4565 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4566 enum tree_code code;
4567 enum machine_mode vec_mode;
4568 tree new_temp;
4569 int op_type;
4570 optab optab;
4571 int icode;
4572 tree def;
4573 gimple def_stmt;
4574 enum vect_def_type dt[3]
4575 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4576 gimple new_stmt = NULL;
4577 stmt_vec_info prev_stmt_info;
4578 int nunits_in;
4579 int nunits_out;
4580 tree vectype_out;
4581 int ncopies;
4582 int j, i;
4583 vec<tree> vec_oprnds0 = vNULL;
4584 vec<tree> vec_oprnds1 = vNULL;
4585 vec<tree> vec_oprnds2 = vNULL;
4586 tree vop0, vop1, vop2;
4587 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4588 int vf;
4589
4590 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4591 return false;
4592
4593 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4594 return false;
4595
4596 /* Is STMT a vectorizable binary/unary operation? */
4597 if (!is_gimple_assign (stmt))
4598 return false;
4599
4600 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4601 return false;
4602
4603 code = gimple_assign_rhs_code (stmt);
4604
4605 /* For pointer addition, we should use the normal plus for
4606 the vector addition. */
4607 if (code == POINTER_PLUS_EXPR)
4608 code = PLUS_EXPR;
4609
4610 /* Support only unary or binary operations. */
4611 op_type = TREE_CODE_LENGTH (code);
4612 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4613 {
4614 if (dump_enabled_p ())
4615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4616 "num. args = %d (not unary/binary/ternary op).\n",
4617 op_type);
4618 return false;
4619 }
4620
4621 scalar_dest = gimple_assign_lhs (stmt);
4622 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4623
4624 /* Most operations cannot handle bit-precision types without extra
4625 truncations. */
4626 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4627 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4628 /* Exception are bitwise binary operations. */
4629 && code != BIT_IOR_EXPR
4630 && code != BIT_XOR_EXPR
4631 && code != BIT_AND_EXPR)
4632 {
4633 if (dump_enabled_p ())
4634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4635 "bit-precision arithmetic not supported.\n");
4636 return false;
4637 }
4638
4639 op0 = gimple_assign_rhs1 (stmt);
4640 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4641 &def_stmt, &def, &dt[0], &vectype))
4642 {
4643 if (dump_enabled_p ())
4644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4645 "use not simple.\n");
4646 return false;
4647 }
4648 /* If op0 is an external or constant def use a vector type with
4649 the same size as the output vector type. */
4650 if (!vectype)
4651 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4652 if (vec_stmt)
4653 gcc_assert (vectype);
4654 if (!vectype)
4655 {
4656 if (dump_enabled_p ())
4657 {
4658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4659 "no vectype for scalar type ");
4660 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4661 TREE_TYPE (op0));
4662 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4663 }
4664
4665 return false;
4666 }
4667
4668 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4669 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4670 if (nunits_out != nunits_in)
4671 return false;
4672
4673 if (op_type == binary_op || op_type == ternary_op)
4674 {
4675 op1 = gimple_assign_rhs2 (stmt);
4676 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4677 &def, &dt[1]))
4678 {
4679 if (dump_enabled_p ())
4680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4681 "use not simple.\n");
4682 return false;
4683 }
4684 }
4685 if (op_type == ternary_op)
4686 {
4687 op2 = gimple_assign_rhs3 (stmt);
4688 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4689 &def, &dt[2]))
4690 {
4691 if (dump_enabled_p ())
4692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4693 "use not simple.\n");
4694 return false;
4695 }
4696 }
4697
4698 if (loop_vinfo)
4699 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4700 else
4701 vf = 1;
4702
4703 /* Multiple types in SLP are handled by creating the appropriate number of
4704 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4705 case of SLP. */
4706 if (slp_node || PURE_SLP_STMT (stmt_info))
4707 ncopies = 1;
4708 else
4709 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4710
4711 gcc_assert (ncopies >= 1);
4712
4713 /* Shifts are handled in vectorizable_shift (). */
4714 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4715 || code == RROTATE_EXPR)
4716 return false;
4717
4718 /* Supportable by target? */
4719
4720 vec_mode = TYPE_MODE (vectype);
4721 if (code == MULT_HIGHPART_EXPR)
4722 {
4723 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4724 icode = LAST_INSN_CODE;
4725 else
4726 icode = CODE_FOR_nothing;
4727 }
4728 else
4729 {
4730 optab = optab_for_tree_code (code, vectype, optab_default);
4731 if (!optab)
4732 {
4733 if (dump_enabled_p ())
4734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4735 "no optab.\n");
4736 return false;
4737 }
4738 icode = (int) optab_handler (optab, vec_mode);
4739 }
4740
4741 if (icode == CODE_FOR_nothing)
4742 {
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4745 "op not supported by target.\n");
4746 /* Check only during analysis. */
4747 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4748 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4749 return false;
4750 if (dump_enabled_p ())
4751 dump_printf_loc (MSG_NOTE, vect_location,
4752 "proceeding using word mode.\n");
4753 }
4754
4755 /* Worthwhile without SIMD support? Check only during analysis. */
4756 if (!VECTOR_MODE_P (vec_mode)
4757 && !vec_stmt
4758 && vf < vect_min_worthwhile_factor (code))
4759 {
4760 if (dump_enabled_p ())
4761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4762 "not worthwhile without SIMD support.\n");
4763 return false;
4764 }
4765
4766 if (!vec_stmt) /* transformation not required. */
4767 {
4768 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4769 if (dump_enabled_p ())
4770 dump_printf_loc (MSG_NOTE, vect_location,
4771 "=== vectorizable_operation ===\n");
4772 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4773 return true;
4774 }
4775
4776 /** Transform. **/
4777
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_NOTE, vect_location,
4780 "transform binary/unary operation.\n");
4781
4782 /* Handle def. */
4783 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4784
4785 /* In case the vectorization factor (VF) is bigger than the number
4786 of elements that we can fit in a vectype (nunits), we have to generate
4787 more than one vector stmt - i.e - we need to "unroll" the
4788 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4789 from one copy of the vector stmt to the next, in the field
4790 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4791 stages to find the correct vector defs to be used when vectorizing
4792 stmts that use the defs of the current stmt. The example below
4793 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4794 we need to create 4 vectorized stmts):
4795
4796 before vectorization:
4797 RELATED_STMT VEC_STMT
4798 S1: x = memref - -
4799 S2: z = x + 1 - -
4800
4801 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4802 there):
4803 RELATED_STMT VEC_STMT
4804 VS1_0: vx0 = memref0 VS1_1 -
4805 VS1_1: vx1 = memref1 VS1_2 -
4806 VS1_2: vx2 = memref2 VS1_3 -
4807 VS1_3: vx3 = memref3 - -
4808 S1: x = load - VS1_0
4809 S2: z = x + 1 - -
4810
4811 step2: vectorize stmt S2 (done here):
4812 To vectorize stmt S2 we first need to find the relevant vector
4813 def for the first operand 'x'. This is, as usual, obtained from
4814 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4815 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4816 relevant vector def 'vx0'. Having found 'vx0' we can generate
4817 the vector stmt VS2_0, and as usual, record it in the
4818 STMT_VINFO_VEC_STMT of stmt S2.
4819 When creating the second copy (VS2_1), we obtain the relevant vector
4820 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4821 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4822 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4823 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4824 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4825 chain of stmts and pointers:
4826 RELATED_STMT VEC_STMT
4827 VS1_0: vx0 = memref0 VS1_1 -
4828 VS1_1: vx1 = memref1 VS1_2 -
4829 VS1_2: vx2 = memref2 VS1_3 -
4830 VS1_3: vx3 = memref3 - -
4831 S1: x = load - VS1_0
4832 VS2_0: vz0 = vx0 + v1 VS2_1 -
4833 VS2_1: vz1 = vx1 + v1 VS2_2 -
4834 VS2_2: vz2 = vx2 + v1 VS2_3 -
4835 VS2_3: vz3 = vx3 + v1 - -
4836 S2: z = x + 1 - VS2_0 */
4837
4838 prev_stmt_info = NULL;
4839 for (j = 0; j < ncopies; j++)
4840 {
4841 /* Handle uses. */
4842 if (j == 0)
4843 {
4844 if (op_type == binary_op || op_type == ternary_op)
4845 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4846 slp_node, -1);
4847 else
4848 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4849 slp_node, -1);
4850 if (op_type == ternary_op)
4851 {
4852 vec_oprnds2.create (1);
4853 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4854 stmt,
4855 NULL));
4856 }
4857 }
4858 else
4859 {
4860 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4861 if (op_type == ternary_op)
4862 {
4863 tree vec_oprnd = vec_oprnds2.pop ();
4864 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4865 vec_oprnd));
4866 }
4867 }
4868
4869 /* Arguments are ready. Create the new vector stmt. */
4870 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4871 {
4872 vop1 = ((op_type == binary_op || op_type == ternary_op)
4873 ? vec_oprnds1[i] : NULL_TREE);
4874 vop2 = ((op_type == ternary_op)
4875 ? vec_oprnds2[i] : NULL_TREE);
4876 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4877 vop0, vop1, vop2);
4878 new_temp = make_ssa_name (vec_dest, new_stmt);
4879 gimple_assign_set_lhs (new_stmt, new_temp);
4880 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4881 if (slp_node)
4882 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4883 }
4884
4885 if (slp_node)
4886 continue;
4887
4888 if (j == 0)
4889 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4890 else
4891 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4892 prev_stmt_info = vinfo_for_stmt (new_stmt);
4893 }
4894
4895 vec_oprnds0.release ();
4896 vec_oprnds1.release ();
4897 vec_oprnds2.release ();
4898
4899 return true;
4900 }
4901
4902 /* A helper function to ensure data reference DR's base alignment
4903 for STMT_INFO. */
4904
4905 static void
4906 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4907 {
4908 if (!dr->aux)
4909 return;
4910
4911 if (((dataref_aux *)dr->aux)->base_misaligned)
4912 {
4913 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4914 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4915
4916 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4917 DECL_USER_ALIGN (base_decl) = 1;
4918 ((dataref_aux *)dr->aux)->base_misaligned = false;
4919 }
4920 }
4921
4922
4923 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4924 reversal of the vector elements. If that is impossible to do,
4925 returns NULL. */
4926
4927 static tree
4928 perm_mask_for_reverse (tree vectype)
4929 {
4930 int i, nunits;
4931 unsigned char *sel;
4932
4933 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4934 sel = XALLOCAVEC (unsigned char, nunits);
4935
4936 for (i = 0; i < nunits; ++i)
4937 sel[i] = nunits - 1 - i;
4938
4939 return vect_gen_perm_mask (vectype, sel);
4940 }
4941
4942 /* Function vectorizable_store.
4943
4944 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4945 can be vectorized.
4946 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4947 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4948 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4949
4950 static bool
4951 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4952 slp_tree slp_node)
4953 {
4954 tree scalar_dest;
4955 tree data_ref;
4956 tree op;
4957 tree vec_oprnd = NULL_TREE;
4958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4959 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4960 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4961 tree elem_type;
4962 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4963 struct loop *loop = NULL;
4964 enum machine_mode vec_mode;
4965 tree dummy;
4966 enum dr_alignment_support alignment_support_scheme;
4967 tree def;
4968 gimple def_stmt;
4969 enum vect_def_type dt;
4970 stmt_vec_info prev_stmt_info = NULL;
4971 tree dataref_ptr = NULL_TREE;
4972 tree dataref_offset = NULL_TREE;
4973 gimple ptr_incr = NULL;
4974 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4975 int ncopies;
4976 int j;
4977 gimple next_stmt, first_stmt = NULL;
4978 bool grouped_store = false;
4979 bool store_lanes_p = false;
4980 unsigned int group_size, i;
4981 vec<tree> dr_chain = vNULL;
4982 vec<tree> oprnds = vNULL;
4983 vec<tree> result_chain = vNULL;
4984 bool inv_p;
4985 bool negative = false;
4986 tree offset = NULL_TREE;
4987 vec<tree> vec_oprnds = vNULL;
4988 bool slp = (slp_node != NULL);
4989 unsigned int vec_num;
4990 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4991 tree aggr_type;
4992
4993 if (loop_vinfo)
4994 loop = LOOP_VINFO_LOOP (loop_vinfo);
4995
4996 /* Multiple types in SLP are handled by creating the appropriate number of
4997 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4998 case of SLP. */
4999 if (slp || PURE_SLP_STMT (stmt_info))
5000 ncopies = 1;
5001 else
5002 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5003
5004 gcc_assert (ncopies >= 1);
5005
5006 /* FORNOW. This restriction should be relaxed. */
5007 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5008 {
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5011 "multiple types in nested loop.\n");
5012 return false;
5013 }
5014
5015 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5016 return false;
5017
5018 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5019 return false;
5020
5021 /* Is vectorizable store? */
5022
5023 if (!is_gimple_assign (stmt))
5024 return false;
5025
5026 scalar_dest = gimple_assign_lhs (stmt);
5027 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5028 && is_pattern_stmt_p (stmt_info))
5029 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5030 if (TREE_CODE (scalar_dest) != ARRAY_REF
5031 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5032 && TREE_CODE (scalar_dest) != INDIRECT_REF
5033 && TREE_CODE (scalar_dest) != COMPONENT_REF
5034 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5035 && TREE_CODE (scalar_dest) != REALPART_EXPR
5036 && TREE_CODE (scalar_dest) != MEM_REF)
5037 return false;
5038
5039 gcc_assert (gimple_assign_single_p (stmt));
5040 op = gimple_assign_rhs1 (stmt);
5041 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5042 &def, &dt))
5043 {
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5046 "use not simple.\n");
5047 return false;
5048 }
5049
5050 elem_type = TREE_TYPE (vectype);
5051 vec_mode = TYPE_MODE (vectype);
5052
5053 /* FORNOW. In some cases can vectorize even if data-type not supported
5054 (e.g. - array initialization with 0). */
5055 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5056 return false;
5057
5058 if (!STMT_VINFO_DATA_REF (stmt_info))
5059 return false;
5060
5061 negative =
5062 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5063 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5064 size_zero_node) < 0;
5065 if (negative && ncopies > 1)
5066 {
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "multiple types with negative step.\n");
5070 return false;
5071 }
5072
5073 if (negative)
5074 {
5075 gcc_assert (!grouped_store);
5076 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5077 if (alignment_support_scheme != dr_aligned
5078 && alignment_support_scheme != dr_unaligned_supported)
5079 {
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5082 "negative step but alignment required.\n");
5083 return false;
5084 }
5085 if (dt != vect_constant_def
5086 && dt != vect_external_def
5087 && !perm_mask_for_reverse (vectype))
5088 {
5089 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5091 "negative step and reversing not supported.\n");
5092 return false;
5093 }
5094 }
5095
5096 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5097 {
5098 grouped_store = true;
5099 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5100 if (!slp && !PURE_SLP_STMT (stmt_info))
5101 {
5102 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5103 if (vect_store_lanes_supported (vectype, group_size))
5104 store_lanes_p = true;
5105 else if (!vect_grouped_store_supported (vectype, group_size))
5106 return false;
5107 }
5108
5109 if (first_stmt == stmt)
5110 {
5111 /* STMT is the leader of the group. Check the operands of all the
5112 stmts of the group. */
5113 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5114 while (next_stmt)
5115 {
5116 gcc_assert (gimple_assign_single_p (next_stmt));
5117 op = gimple_assign_rhs1 (next_stmt);
5118 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5119 &def_stmt, &def, &dt))
5120 {
5121 if (dump_enabled_p ())
5122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5123 "use not simple.\n");
5124 return false;
5125 }
5126 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5127 }
5128 }
5129 }
5130
5131 if (!vec_stmt) /* transformation not required. */
5132 {
5133 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5134 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5135 NULL, NULL, NULL);
5136 return true;
5137 }
5138
5139 /** Transform. **/
5140
5141 ensure_base_align (stmt_info, dr);
5142
5143 if (grouped_store)
5144 {
5145 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5146 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5147
5148 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5149
5150 /* FORNOW */
5151 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5152
5153 /* We vectorize all the stmts of the interleaving group when we
5154 reach the last stmt in the group. */
5155 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5156 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5157 && !slp)
5158 {
5159 *vec_stmt = NULL;
5160 return true;
5161 }
5162
5163 if (slp)
5164 {
5165 grouped_store = false;
5166 /* VEC_NUM is the number of vect stmts to be created for this
5167 group. */
5168 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5169 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5170 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5171 op = gimple_assign_rhs1 (first_stmt);
5172 }
5173 else
5174 /* VEC_NUM is the number of vect stmts to be created for this
5175 group. */
5176 vec_num = group_size;
5177 }
5178 else
5179 {
5180 first_stmt = stmt;
5181 first_dr = dr;
5182 group_size = vec_num = 1;
5183 }
5184
5185 if (dump_enabled_p ())
5186 dump_printf_loc (MSG_NOTE, vect_location,
5187 "transform store. ncopies = %d\n", ncopies);
5188
5189 dr_chain.create (group_size);
5190 oprnds.create (group_size);
5191
5192 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5193 gcc_assert (alignment_support_scheme);
5194 /* Targets with store-lane instructions must not require explicit
5195 realignment. */
5196 gcc_assert (!store_lanes_p
5197 || alignment_support_scheme == dr_aligned
5198 || alignment_support_scheme == dr_unaligned_supported);
5199
5200 if (negative)
5201 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5202
5203 if (store_lanes_p)
5204 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5205 else
5206 aggr_type = vectype;
5207
5208 /* In case the vectorization factor (VF) is bigger than the number
5209 of elements that we can fit in a vectype (nunits), we have to generate
5210 more than one vector stmt - i.e - we need to "unroll" the
5211 vector stmt by a factor VF/nunits. For more details see documentation in
5212 vect_get_vec_def_for_copy_stmt. */
5213
5214 /* In case of interleaving (non-unit grouped access):
5215
5216 S1: &base + 2 = x2
5217 S2: &base = x0
5218 S3: &base + 1 = x1
5219 S4: &base + 3 = x3
5220
5221 We create vectorized stores starting from base address (the access of the
5222 first stmt in the chain (S2 in the above example), when the last store stmt
5223 of the chain (S4) is reached:
5224
5225 VS1: &base = vx2
5226 VS2: &base + vec_size*1 = vx0
5227 VS3: &base + vec_size*2 = vx1
5228 VS4: &base + vec_size*3 = vx3
5229
5230 Then permutation statements are generated:
5231
5232 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5233 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5234 ...
5235
5236 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5237 (the order of the data-refs in the output of vect_permute_store_chain
5238 corresponds to the order of scalar stmts in the interleaving chain - see
5239 the documentation of vect_permute_store_chain()).
5240
5241 In case of both multiple types and interleaving, above vector stores and
5242 permutation stmts are created for every copy. The result vector stmts are
5243 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5244 STMT_VINFO_RELATED_STMT for the next copies.
5245 */
5246
5247 prev_stmt_info = NULL;
5248 for (j = 0; j < ncopies; j++)
5249 {
5250 gimple new_stmt;
5251
5252 if (j == 0)
5253 {
5254 if (slp)
5255 {
5256 /* Get vectorized arguments for SLP_NODE. */
5257 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5258 NULL, slp_node, -1);
5259
5260 vec_oprnd = vec_oprnds[0];
5261 }
5262 else
5263 {
5264 /* For interleaved stores we collect vectorized defs for all the
5265 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5266 used as an input to vect_permute_store_chain(), and OPRNDS as
5267 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5268
5269 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5270 OPRNDS are of size 1. */
5271 next_stmt = first_stmt;
5272 for (i = 0; i < group_size; i++)
5273 {
5274 /* Since gaps are not supported for interleaved stores,
5275 GROUP_SIZE is the exact number of stmts in the chain.
5276 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5277 there is no interleaving, GROUP_SIZE is 1, and only one
5278 iteration of the loop will be executed. */
5279 gcc_assert (next_stmt
5280 && gimple_assign_single_p (next_stmt));
5281 op = gimple_assign_rhs1 (next_stmt);
5282
5283 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5284 NULL);
5285 dr_chain.quick_push (vec_oprnd);
5286 oprnds.quick_push (vec_oprnd);
5287 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5288 }
5289 }
5290
5291 /* We should have catched mismatched types earlier. */
5292 gcc_assert (useless_type_conversion_p (vectype,
5293 TREE_TYPE (vec_oprnd)));
5294 bool simd_lane_access_p
5295 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5296 if (simd_lane_access_p
5297 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5298 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5299 && integer_zerop (DR_OFFSET (first_dr))
5300 && integer_zerop (DR_INIT (first_dr))
5301 && alias_sets_conflict_p (get_alias_set (aggr_type),
5302 get_alias_set (DR_REF (first_dr))))
5303 {
5304 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5305 dataref_offset = build_int_cst (reference_alias_ptr_type
5306 (DR_REF (first_dr)), 0);
5307 inv_p = false;
5308 }
5309 else
5310 dataref_ptr
5311 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5312 simd_lane_access_p ? loop : NULL,
5313 offset, &dummy, gsi, &ptr_incr,
5314 simd_lane_access_p, &inv_p);
5315 gcc_assert (bb_vinfo || !inv_p);
5316 }
5317 else
5318 {
5319 /* For interleaved stores we created vectorized defs for all the
5320 defs stored in OPRNDS in the previous iteration (previous copy).
5321 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5322 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5323 next copy.
5324 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5325 OPRNDS are of size 1. */
5326 for (i = 0; i < group_size; i++)
5327 {
5328 op = oprnds[i];
5329 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5330 &def, &dt);
5331 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5332 dr_chain[i] = vec_oprnd;
5333 oprnds[i] = vec_oprnd;
5334 }
5335 if (dataref_offset)
5336 dataref_offset
5337 = int_const_binop (PLUS_EXPR, dataref_offset,
5338 TYPE_SIZE_UNIT (aggr_type));
5339 else
5340 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5341 TYPE_SIZE_UNIT (aggr_type));
5342 }
5343
5344 if (store_lanes_p)
5345 {
5346 tree vec_array;
5347
5348 /* Combine all the vectors into an array. */
5349 vec_array = create_vector_array (vectype, vec_num);
5350 for (i = 0; i < vec_num; i++)
5351 {
5352 vec_oprnd = dr_chain[i];
5353 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5354 }
5355
5356 /* Emit:
5357 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5358 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5359 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5360 gimple_call_set_lhs (new_stmt, data_ref);
5361 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5362 }
5363 else
5364 {
5365 new_stmt = NULL;
5366 if (grouped_store)
5367 {
5368 if (j == 0)
5369 result_chain.create (group_size);
5370 /* Permute. */
5371 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5372 &result_chain);
5373 }
5374
5375 next_stmt = first_stmt;
5376 for (i = 0; i < vec_num; i++)
5377 {
5378 unsigned align, misalign;
5379
5380 if (i > 0)
5381 /* Bump the vector pointer. */
5382 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5383 stmt, NULL_TREE);
5384
5385 if (slp)
5386 vec_oprnd = vec_oprnds[i];
5387 else if (grouped_store)
5388 /* For grouped stores vectorized defs are interleaved in
5389 vect_permute_store_chain(). */
5390 vec_oprnd = result_chain[i];
5391
5392 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5393 dataref_offset
5394 ? dataref_offset
5395 : build_int_cst (reference_alias_ptr_type
5396 (DR_REF (first_dr)), 0));
5397 align = TYPE_ALIGN_UNIT (vectype);
5398 if (aligned_access_p (first_dr))
5399 misalign = 0;
5400 else if (DR_MISALIGNMENT (first_dr) == -1)
5401 {
5402 TREE_TYPE (data_ref)
5403 = build_aligned_type (TREE_TYPE (data_ref),
5404 TYPE_ALIGN (elem_type));
5405 align = TYPE_ALIGN_UNIT (elem_type);
5406 misalign = 0;
5407 }
5408 else
5409 {
5410 TREE_TYPE (data_ref)
5411 = build_aligned_type (TREE_TYPE (data_ref),
5412 TYPE_ALIGN (elem_type));
5413 misalign = DR_MISALIGNMENT (first_dr);
5414 }
5415 if (dataref_offset == NULL_TREE)
5416 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5417 misalign);
5418
5419 if (negative
5420 && dt != vect_constant_def
5421 && dt != vect_external_def)
5422 {
5423 tree perm_mask = perm_mask_for_reverse (vectype);
5424 tree perm_dest
5425 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5426 vectype);
5427 tree new_temp = make_ssa_name (perm_dest, NULL);
5428
5429 /* Generate the permute statement. */
5430 gimple perm_stmt
5431 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5432 vec_oprnd, vec_oprnd,
5433 perm_mask);
5434 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5435
5436 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5437 vec_oprnd = new_temp;
5438 }
5439
5440 /* Arguments are ready. Create the new vector stmt. */
5441 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5442 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5443
5444 if (slp)
5445 continue;
5446
5447 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5448 if (!next_stmt)
5449 break;
5450 }
5451 }
5452 if (!slp)
5453 {
5454 if (j == 0)
5455 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5456 else
5457 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5458 prev_stmt_info = vinfo_for_stmt (new_stmt);
5459 }
5460 }
5461
5462 dr_chain.release ();
5463 oprnds.release ();
5464 result_chain.release ();
5465 vec_oprnds.release ();
5466
5467 return true;
5468 }
5469
5470 /* Given a vector type VECTYPE and permutation SEL returns
5471 the VECTOR_CST mask that implements the permutation of the
5472 vector elements. If that is impossible to do, returns NULL. */
5473
5474 tree
5475 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5476 {
5477 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5478 int i, nunits;
5479
5480 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5481
5482 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5483 return NULL;
5484
5485 mask_elt_type = lang_hooks.types.type_for_mode
5486 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5487 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5488
5489 mask_elts = XALLOCAVEC (tree, nunits);
5490 for (i = nunits - 1; i >= 0; i--)
5491 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5492 mask_vec = build_vector (mask_type, mask_elts);
5493
5494 return mask_vec;
5495 }
5496
5497 /* Given a vector variable X and Y, that was generated for the scalar
5498 STMT, generate instructions to permute the vector elements of X and Y
5499 using permutation mask MASK_VEC, insert them at *GSI and return the
5500 permuted vector variable. */
5501
5502 static tree
5503 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5504 gimple_stmt_iterator *gsi)
5505 {
5506 tree vectype = TREE_TYPE (x);
5507 tree perm_dest, data_ref;
5508 gimple perm_stmt;
5509
5510 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5511 data_ref = make_ssa_name (perm_dest, NULL);
5512
5513 /* Generate the permute statement. */
5514 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5515 x, y, mask_vec);
5516 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5517
5518 return data_ref;
5519 }
5520
5521 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5522 inserting them on the loops preheader edge. Returns true if we
5523 were successful in doing so (and thus STMT can be moved then),
5524 otherwise returns false. */
5525
5526 static bool
5527 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5528 {
5529 ssa_op_iter i;
5530 tree op;
5531 bool any = false;
5532
5533 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5534 {
5535 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5536 if (!gimple_nop_p (def_stmt)
5537 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5538 {
5539 /* Make sure we don't need to recurse. While we could do
5540 so in simple cases when there are more complex use webs
5541 we don't have an easy way to preserve stmt order to fulfil
5542 dependencies within them. */
5543 tree op2;
5544 ssa_op_iter i2;
5545 if (gimple_code (def_stmt) == GIMPLE_PHI)
5546 return false;
5547 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5548 {
5549 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5550 if (!gimple_nop_p (def_stmt2)
5551 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5552 return false;
5553 }
5554 any = true;
5555 }
5556 }
5557
5558 if (!any)
5559 return true;
5560
5561 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5562 {
5563 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5564 if (!gimple_nop_p (def_stmt)
5565 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5566 {
5567 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5568 gsi_remove (&gsi, false);
5569 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5570 }
5571 }
5572
5573 return true;
5574 }
5575
5576 /* vectorizable_load.
5577
5578 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5579 can be vectorized.
5580 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5581 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5582 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5583
5584 static bool
5585 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5586 slp_tree slp_node, slp_instance slp_node_instance)
5587 {
5588 tree scalar_dest;
5589 tree vec_dest = NULL;
5590 tree data_ref = NULL;
5591 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5592 stmt_vec_info prev_stmt_info;
5593 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5594 struct loop *loop = NULL;
5595 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5596 bool nested_in_vect_loop = false;
5597 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5598 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5599 tree elem_type;
5600 tree new_temp;
5601 enum machine_mode mode;
5602 gimple new_stmt = NULL;
5603 tree dummy;
5604 enum dr_alignment_support alignment_support_scheme;
5605 tree dataref_ptr = NULL_TREE;
5606 tree dataref_offset = NULL_TREE;
5607 gimple ptr_incr = NULL;
5608 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5609 int ncopies;
5610 int i, j, group_size, group_gap;
5611 tree msq = NULL_TREE, lsq;
5612 tree offset = NULL_TREE;
5613 tree byte_offset = NULL_TREE;
5614 tree realignment_token = NULL_TREE;
5615 gimple phi = NULL;
5616 vec<tree> dr_chain = vNULL;
5617 bool grouped_load = false;
5618 bool load_lanes_p = false;
5619 gimple first_stmt;
5620 bool inv_p;
5621 bool negative = false;
5622 bool compute_in_loop = false;
5623 struct loop *at_loop;
5624 int vec_num;
5625 bool slp = (slp_node != NULL);
5626 bool slp_perm = false;
5627 enum tree_code code;
5628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5629 int vf;
5630 tree aggr_type;
5631 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5632 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5633 int gather_scale = 1;
5634 enum vect_def_type gather_dt = vect_unknown_def_type;
5635
5636 if (loop_vinfo)
5637 {
5638 loop = LOOP_VINFO_LOOP (loop_vinfo);
5639 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5640 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5641 }
5642 else
5643 vf = 1;
5644
5645 /* Multiple types in SLP are handled by creating the appropriate number of
5646 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5647 case of SLP. */
5648 if (slp || PURE_SLP_STMT (stmt_info))
5649 ncopies = 1;
5650 else
5651 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5652
5653 gcc_assert (ncopies >= 1);
5654
5655 /* FORNOW. This restriction should be relaxed. */
5656 if (nested_in_vect_loop && ncopies > 1)
5657 {
5658 if (dump_enabled_p ())
5659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5660 "multiple types in nested loop.\n");
5661 return false;
5662 }
5663
5664 /* Invalidate assumptions made by dependence analysis when vectorization
5665 on the unrolled body effectively re-orders stmts. */
5666 if (ncopies > 1
5667 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5668 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5669 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5670 {
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5673 "cannot perform implicit CSE when unrolling "
5674 "with negative dependence distance\n");
5675 return false;
5676 }
5677
5678 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5679 return false;
5680
5681 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5682 return false;
5683
5684 /* Is vectorizable load? */
5685 if (!is_gimple_assign (stmt))
5686 return false;
5687
5688 scalar_dest = gimple_assign_lhs (stmt);
5689 if (TREE_CODE (scalar_dest) != SSA_NAME)
5690 return false;
5691
5692 code = gimple_assign_rhs_code (stmt);
5693 if (code != ARRAY_REF
5694 && code != BIT_FIELD_REF
5695 && code != INDIRECT_REF
5696 && code != COMPONENT_REF
5697 && code != IMAGPART_EXPR
5698 && code != REALPART_EXPR
5699 && code != MEM_REF
5700 && TREE_CODE_CLASS (code) != tcc_declaration)
5701 return false;
5702
5703 if (!STMT_VINFO_DATA_REF (stmt_info))
5704 return false;
5705
5706 elem_type = TREE_TYPE (vectype);
5707 mode = TYPE_MODE (vectype);
5708
5709 /* FORNOW. In some cases can vectorize even if data-type not supported
5710 (e.g. - data copies). */
5711 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5712 {
5713 if (dump_enabled_p ())
5714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5715 "Aligned load, but unsupported type.\n");
5716 return false;
5717 }
5718
5719 /* Check if the load is a part of an interleaving chain. */
5720 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5721 {
5722 grouped_load = true;
5723 /* FORNOW */
5724 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5725
5726 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5727 if (!slp && !PURE_SLP_STMT (stmt_info))
5728 {
5729 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5730 if (vect_load_lanes_supported (vectype, group_size))
5731 load_lanes_p = true;
5732 else if (!vect_grouped_load_supported (vectype, group_size))
5733 return false;
5734 }
5735
5736 /* Invalidate assumptions made by dependence analysis when vectorization
5737 on the unrolled body effectively re-orders stmts. */
5738 if (!PURE_SLP_STMT (stmt_info)
5739 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5740 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5741 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5742 {
5743 if (dump_enabled_p ())
5744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5745 "cannot perform implicit CSE when performing "
5746 "group loads with negative dependence distance\n");
5747 return false;
5748 }
5749 }
5750
5751
5752 if (STMT_VINFO_GATHER_P (stmt_info))
5753 {
5754 gimple def_stmt;
5755 tree def;
5756 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5757 &gather_off, &gather_scale);
5758 gcc_assert (gather_decl);
5759 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5760 &def_stmt, &def, &gather_dt,
5761 &gather_off_vectype))
5762 {
5763 if (dump_enabled_p ())
5764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5765 "gather index use not simple.\n");
5766 return false;
5767 }
5768 }
5769 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5770 ;
5771 else
5772 {
5773 negative = tree_int_cst_compare (nested_in_vect_loop
5774 ? STMT_VINFO_DR_STEP (stmt_info)
5775 : DR_STEP (dr),
5776 size_zero_node) < 0;
5777 if (negative && ncopies > 1)
5778 {
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5781 "multiple types with negative step.\n");
5782 return false;
5783 }
5784
5785 if (negative)
5786 {
5787 if (grouped_load)
5788 {
5789 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5791 "negative step for group load not supported"
5792 "\n");
5793 return false;
5794 }
5795 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5796 if (alignment_support_scheme != dr_aligned
5797 && alignment_support_scheme != dr_unaligned_supported)
5798 {
5799 if (dump_enabled_p ())
5800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5801 "negative step but alignment required.\n");
5802 return false;
5803 }
5804 if (!perm_mask_for_reverse (vectype))
5805 {
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5808 "negative step and reversing not supported."
5809 "\n");
5810 return false;
5811 }
5812 }
5813 }
5814
5815 if (!vec_stmt) /* transformation not required. */
5816 {
5817 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5818 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5819 return true;
5820 }
5821
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_NOTE, vect_location,
5824 "transform load. ncopies = %d\n", ncopies);
5825
5826 /** Transform. **/
5827
5828 ensure_base_align (stmt_info, dr);
5829
5830 if (STMT_VINFO_GATHER_P (stmt_info))
5831 {
5832 tree vec_oprnd0 = NULL_TREE, op;
5833 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5834 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5835 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5836 edge pe = loop_preheader_edge (loop);
5837 gimple_seq seq;
5838 basic_block new_bb;
5839 enum { NARROW, NONE, WIDEN } modifier;
5840 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5841
5842 if (nunits == gather_off_nunits)
5843 modifier = NONE;
5844 else if (nunits == gather_off_nunits / 2)
5845 {
5846 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5847 modifier = WIDEN;
5848
5849 for (i = 0; i < gather_off_nunits; ++i)
5850 sel[i] = i | nunits;
5851
5852 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5853 gcc_assert (perm_mask != NULL_TREE);
5854 }
5855 else if (nunits == gather_off_nunits * 2)
5856 {
5857 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5858 modifier = NARROW;
5859
5860 for (i = 0; i < nunits; ++i)
5861 sel[i] = i < gather_off_nunits
5862 ? i : i + nunits - gather_off_nunits;
5863
5864 perm_mask = vect_gen_perm_mask (vectype, sel);
5865 gcc_assert (perm_mask != NULL_TREE);
5866 ncopies *= 2;
5867 }
5868 else
5869 gcc_unreachable ();
5870
5871 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5872 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5873 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5874 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5875 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5876 scaletype = TREE_VALUE (arglist);
5877 gcc_checking_assert (types_compatible_p (srctype, rettype));
5878
5879 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5880
5881 ptr = fold_convert (ptrtype, gather_base);
5882 if (!is_gimple_min_invariant (ptr))
5883 {
5884 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5885 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5886 gcc_assert (!new_bb);
5887 }
5888
5889 /* Currently we support only unconditional gather loads,
5890 so mask should be all ones. */
5891 if (TREE_CODE (masktype) == INTEGER_TYPE)
5892 mask = build_int_cst (masktype, -1);
5893 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5894 {
5895 mask = build_int_cst (TREE_TYPE (masktype), -1);
5896 mask = build_vector_from_val (masktype, mask);
5897 mask = vect_init_vector (stmt, mask, masktype, NULL);
5898 }
5899 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5900 {
5901 REAL_VALUE_TYPE r;
5902 long tmp[6];
5903 for (j = 0; j < 6; ++j)
5904 tmp[j] = -1;
5905 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5906 mask = build_real (TREE_TYPE (masktype), r);
5907 mask = build_vector_from_val (masktype, mask);
5908 mask = vect_init_vector (stmt, mask, masktype, NULL);
5909 }
5910 else
5911 gcc_unreachable ();
5912
5913 scale = build_int_cst (scaletype, gather_scale);
5914
5915 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5916 merge = build_int_cst (TREE_TYPE (rettype), 0);
5917 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5918 {
5919 REAL_VALUE_TYPE r;
5920 long tmp[6];
5921 for (j = 0; j < 6; ++j)
5922 tmp[j] = 0;
5923 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5924 merge = build_real (TREE_TYPE (rettype), r);
5925 }
5926 else
5927 gcc_unreachable ();
5928 merge = build_vector_from_val (rettype, merge);
5929 merge = vect_init_vector (stmt, merge, rettype, NULL);
5930
5931 prev_stmt_info = NULL;
5932 for (j = 0; j < ncopies; ++j)
5933 {
5934 if (modifier == WIDEN && (j & 1))
5935 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5936 perm_mask, stmt, gsi);
5937 else if (j == 0)
5938 op = vec_oprnd0
5939 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5940 else
5941 op = vec_oprnd0
5942 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5943
5944 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5945 {
5946 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5947 == TYPE_VECTOR_SUBPARTS (idxtype));
5948 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5949 var = make_ssa_name (var, NULL);
5950 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5951 new_stmt
5952 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5953 op, NULL_TREE);
5954 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5955 op = var;
5956 }
5957
5958 new_stmt
5959 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5960
5961 if (!useless_type_conversion_p (vectype, rettype))
5962 {
5963 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5964 == TYPE_VECTOR_SUBPARTS (rettype));
5965 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5966 op = make_ssa_name (var, new_stmt);
5967 gimple_call_set_lhs (new_stmt, op);
5968 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5969 var = make_ssa_name (vec_dest, NULL);
5970 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5971 new_stmt
5972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5973 NULL_TREE);
5974 }
5975 else
5976 {
5977 var = make_ssa_name (vec_dest, new_stmt);
5978 gimple_call_set_lhs (new_stmt, var);
5979 }
5980
5981 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5982
5983 if (modifier == NARROW)
5984 {
5985 if ((j & 1) == 0)
5986 {
5987 prev_res = var;
5988 continue;
5989 }
5990 var = permute_vec_elements (prev_res, var,
5991 perm_mask, stmt, gsi);
5992 new_stmt = SSA_NAME_DEF_STMT (var);
5993 }
5994
5995 if (prev_stmt_info == NULL)
5996 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5997 else
5998 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5999 prev_stmt_info = vinfo_for_stmt (new_stmt);
6000 }
6001 return true;
6002 }
6003 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6004 {
6005 gimple_stmt_iterator incr_gsi;
6006 bool insert_after;
6007 gimple incr;
6008 tree offvar;
6009 tree ivstep;
6010 tree running_off;
6011 vec<constructor_elt, va_gc> *v = NULL;
6012 gimple_seq stmts = NULL;
6013 tree stride_base, stride_step, alias_off;
6014
6015 gcc_assert (!nested_in_vect_loop);
6016
6017 stride_base
6018 = fold_build_pointer_plus
6019 (unshare_expr (DR_BASE_ADDRESS (dr)),
6020 size_binop (PLUS_EXPR,
6021 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6022 convert_to_ptrofftype (DR_INIT (dr))));
6023 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6024
6025 /* For a load with loop-invariant (but other than power-of-2)
6026 stride (i.e. not a grouped access) like so:
6027
6028 for (i = 0; i < n; i += stride)
6029 ... = array[i];
6030
6031 we generate a new induction variable and new accesses to
6032 form a new vector (or vectors, depending on ncopies):
6033
6034 for (j = 0; ; j += VF*stride)
6035 tmp1 = array[j];
6036 tmp2 = array[j + stride];
6037 ...
6038 vectemp = {tmp1, tmp2, ...}
6039 */
6040
6041 ivstep = stride_step;
6042 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6043 build_int_cst (TREE_TYPE (ivstep), vf));
6044
6045 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6046
6047 create_iv (stride_base, ivstep, NULL,
6048 loop, &incr_gsi, insert_after,
6049 &offvar, NULL);
6050 incr = gsi_stmt (incr_gsi);
6051 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6052
6053 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6054 if (stmts)
6055 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6056
6057 prev_stmt_info = NULL;
6058 running_off = offvar;
6059 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6060 for (j = 0; j < ncopies; j++)
6061 {
6062 tree vec_inv;
6063
6064 vec_alloc (v, nunits);
6065 for (i = 0; i < nunits; i++)
6066 {
6067 tree newref, newoff;
6068 gimple incr;
6069 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6070 running_off, alias_off);
6071
6072 newref = force_gimple_operand_gsi (gsi, newref, true,
6073 NULL_TREE, true,
6074 GSI_SAME_STMT);
6075 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6076 newoff = copy_ssa_name (running_off, NULL);
6077 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6078 running_off, stride_step);
6079 vect_finish_stmt_generation (stmt, incr, gsi);
6080
6081 running_off = newoff;
6082 }
6083
6084 vec_inv = build_constructor (vectype, v);
6085 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6086 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6087
6088 if (j == 0)
6089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6090 else
6091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6092 prev_stmt_info = vinfo_for_stmt (new_stmt);
6093 }
6094 return true;
6095 }
6096
6097 if (grouped_load)
6098 {
6099 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6100 if (slp
6101 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6102 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6103 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6104
6105 /* Check if the chain of loads is already vectorized. */
6106 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6107 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6108 ??? But we can only do so if there is exactly one
6109 as we have no way to get at the rest. Leave the CSE
6110 opportunity alone.
6111 ??? With the group load eventually participating
6112 in multiple different permutations (having multiple
6113 slp nodes which refer to the same group) the CSE
6114 is even wrong code. See PR56270. */
6115 && !slp)
6116 {
6117 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6118 return true;
6119 }
6120 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6121 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6122
6123 /* VEC_NUM is the number of vect stmts to be created for this group. */
6124 if (slp)
6125 {
6126 grouped_load = false;
6127 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6128 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6129 slp_perm = true;
6130 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6131 }
6132 else
6133 {
6134 vec_num = group_size;
6135 group_gap = 0;
6136 }
6137 }
6138 else
6139 {
6140 first_stmt = stmt;
6141 first_dr = dr;
6142 group_size = vec_num = 1;
6143 group_gap = 0;
6144 }
6145
6146 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6147 gcc_assert (alignment_support_scheme);
6148 /* Targets with load-lane instructions must not require explicit
6149 realignment. */
6150 gcc_assert (!load_lanes_p
6151 || alignment_support_scheme == dr_aligned
6152 || alignment_support_scheme == dr_unaligned_supported);
6153
6154 /* In case the vectorization factor (VF) is bigger than the number
6155 of elements that we can fit in a vectype (nunits), we have to generate
6156 more than one vector stmt - i.e - we need to "unroll" the
6157 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6158 from one copy of the vector stmt to the next, in the field
6159 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6160 stages to find the correct vector defs to be used when vectorizing
6161 stmts that use the defs of the current stmt. The example below
6162 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6163 need to create 4 vectorized stmts):
6164
6165 before vectorization:
6166 RELATED_STMT VEC_STMT
6167 S1: x = memref - -
6168 S2: z = x + 1 - -
6169
6170 step 1: vectorize stmt S1:
6171 We first create the vector stmt VS1_0, and, as usual, record a
6172 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6173 Next, we create the vector stmt VS1_1, and record a pointer to
6174 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6175 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6176 stmts and pointers:
6177 RELATED_STMT VEC_STMT
6178 VS1_0: vx0 = memref0 VS1_1 -
6179 VS1_1: vx1 = memref1 VS1_2 -
6180 VS1_2: vx2 = memref2 VS1_3 -
6181 VS1_3: vx3 = memref3 - -
6182 S1: x = load - VS1_0
6183 S2: z = x + 1 - -
6184
6185 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6186 information we recorded in RELATED_STMT field is used to vectorize
6187 stmt S2. */
6188
6189 /* In case of interleaving (non-unit grouped access):
6190
6191 S1: x2 = &base + 2
6192 S2: x0 = &base
6193 S3: x1 = &base + 1
6194 S4: x3 = &base + 3
6195
6196 Vectorized loads are created in the order of memory accesses
6197 starting from the access of the first stmt of the chain:
6198
6199 VS1: vx0 = &base
6200 VS2: vx1 = &base + vec_size*1
6201 VS3: vx3 = &base + vec_size*2
6202 VS4: vx4 = &base + vec_size*3
6203
6204 Then permutation statements are generated:
6205
6206 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6207 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6208 ...
6209
6210 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6211 (the order of the data-refs in the output of vect_permute_load_chain
6212 corresponds to the order of scalar stmts in the interleaving chain - see
6213 the documentation of vect_permute_load_chain()).
6214 The generation of permutation stmts and recording them in
6215 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6216
6217 In case of both multiple types and interleaving, the vector loads and
6218 permutation stmts above are created for every copy. The result vector
6219 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6220 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6221
6222 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6223 on a target that supports unaligned accesses (dr_unaligned_supported)
6224 we generate the following code:
6225 p = initial_addr;
6226 indx = 0;
6227 loop {
6228 p = p + indx * vectype_size;
6229 vec_dest = *(p);
6230 indx = indx + 1;
6231 }
6232
6233 Otherwise, the data reference is potentially unaligned on a target that
6234 does not support unaligned accesses (dr_explicit_realign_optimized) -
6235 then generate the following code, in which the data in each iteration is
6236 obtained by two vector loads, one from the previous iteration, and one
6237 from the current iteration:
6238 p1 = initial_addr;
6239 msq_init = *(floor(p1))
6240 p2 = initial_addr + VS - 1;
6241 realignment_token = call target_builtin;
6242 indx = 0;
6243 loop {
6244 p2 = p2 + indx * vectype_size
6245 lsq = *(floor(p2))
6246 vec_dest = realign_load (msq, lsq, realignment_token)
6247 indx = indx + 1;
6248 msq = lsq;
6249 } */
6250
6251 /* If the misalignment remains the same throughout the execution of the
6252 loop, we can create the init_addr and permutation mask at the loop
6253 preheader. Otherwise, it needs to be created inside the loop.
6254 This can only occur when vectorizing memory accesses in the inner-loop
6255 nested within an outer-loop that is being vectorized. */
6256
6257 if (nested_in_vect_loop
6258 && (TREE_INT_CST_LOW (DR_STEP (dr))
6259 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6260 {
6261 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6262 compute_in_loop = true;
6263 }
6264
6265 if ((alignment_support_scheme == dr_explicit_realign_optimized
6266 || alignment_support_scheme == dr_explicit_realign)
6267 && !compute_in_loop)
6268 {
6269 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6270 alignment_support_scheme, NULL_TREE,
6271 &at_loop);
6272 if (alignment_support_scheme == dr_explicit_realign_optimized)
6273 {
6274 phi = SSA_NAME_DEF_STMT (msq);
6275 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6276 size_one_node);
6277 }
6278 }
6279 else
6280 at_loop = loop;
6281
6282 if (negative)
6283 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6284
6285 if (load_lanes_p)
6286 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6287 else
6288 aggr_type = vectype;
6289
6290 prev_stmt_info = NULL;
6291 for (j = 0; j < ncopies; j++)
6292 {
6293 /* 1. Create the vector or array pointer update chain. */
6294 if (j == 0)
6295 {
6296 bool simd_lane_access_p
6297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6298 if (simd_lane_access_p
6299 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6300 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6301 && integer_zerop (DR_OFFSET (first_dr))
6302 && integer_zerop (DR_INIT (first_dr))
6303 && alias_sets_conflict_p (get_alias_set (aggr_type),
6304 get_alias_set (DR_REF (first_dr)))
6305 && (alignment_support_scheme == dr_aligned
6306 || alignment_support_scheme == dr_unaligned_supported))
6307 {
6308 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6309 dataref_offset = build_int_cst (reference_alias_ptr_type
6310 (DR_REF (first_dr)), 0);
6311 inv_p = false;
6312 }
6313 else
6314 dataref_ptr
6315 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6316 offset, &dummy, gsi, &ptr_incr,
6317 simd_lane_access_p, &inv_p,
6318 byte_offset);
6319 }
6320 else if (dataref_offset)
6321 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6322 TYPE_SIZE_UNIT (aggr_type));
6323 else
6324 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6325 TYPE_SIZE_UNIT (aggr_type));
6326
6327 if (grouped_load || slp_perm)
6328 dr_chain.create (vec_num);
6329
6330 if (load_lanes_p)
6331 {
6332 tree vec_array;
6333
6334 vec_array = create_vector_array (vectype, vec_num);
6335
6336 /* Emit:
6337 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6338 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6339 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6340 gimple_call_set_lhs (new_stmt, vec_array);
6341 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6342
6343 /* Extract each vector into an SSA_NAME. */
6344 for (i = 0; i < vec_num; i++)
6345 {
6346 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6347 vec_array, i);
6348 dr_chain.quick_push (new_temp);
6349 }
6350
6351 /* Record the mapping between SSA_NAMEs and statements. */
6352 vect_record_grouped_load_vectors (stmt, dr_chain);
6353 }
6354 else
6355 {
6356 for (i = 0; i < vec_num; i++)
6357 {
6358 if (i > 0)
6359 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6360 stmt, NULL_TREE);
6361
6362 /* 2. Create the vector-load in the loop. */
6363 switch (alignment_support_scheme)
6364 {
6365 case dr_aligned:
6366 case dr_unaligned_supported:
6367 {
6368 unsigned int align, misalign;
6369
6370 data_ref
6371 = build2 (MEM_REF, vectype, dataref_ptr,
6372 dataref_offset
6373 ? dataref_offset
6374 : build_int_cst (reference_alias_ptr_type
6375 (DR_REF (first_dr)), 0));
6376 align = TYPE_ALIGN_UNIT (vectype);
6377 if (alignment_support_scheme == dr_aligned)
6378 {
6379 gcc_assert (aligned_access_p (first_dr));
6380 misalign = 0;
6381 }
6382 else if (DR_MISALIGNMENT (first_dr) == -1)
6383 {
6384 TREE_TYPE (data_ref)
6385 = build_aligned_type (TREE_TYPE (data_ref),
6386 TYPE_ALIGN (elem_type));
6387 align = TYPE_ALIGN_UNIT (elem_type);
6388 misalign = 0;
6389 }
6390 else
6391 {
6392 TREE_TYPE (data_ref)
6393 = build_aligned_type (TREE_TYPE (data_ref),
6394 TYPE_ALIGN (elem_type));
6395 misalign = DR_MISALIGNMENT (first_dr);
6396 }
6397 if (dataref_offset == NULL_TREE)
6398 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6399 align, misalign);
6400 break;
6401 }
6402 case dr_explicit_realign:
6403 {
6404 tree ptr, bump;
6405 tree vs_minus_1;
6406
6407 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6408
6409 if (compute_in_loop)
6410 msq = vect_setup_realignment (first_stmt, gsi,
6411 &realignment_token,
6412 dr_explicit_realign,
6413 dataref_ptr, NULL);
6414
6415 ptr = copy_ssa_name (dataref_ptr, NULL);
6416 new_stmt = gimple_build_assign_with_ops
6417 (BIT_AND_EXPR, ptr, dataref_ptr,
6418 build_int_cst
6419 (TREE_TYPE (dataref_ptr),
6420 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6421 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6422 data_ref
6423 = build2 (MEM_REF, vectype, ptr,
6424 build_int_cst (reference_alias_ptr_type
6425 (DR_REF (first_dr)), 0));
6426 vec_dest = vect_create_destination_var (scalar_dest,
6427 vectype);
6428 new_stmt = gimple_build_assign (vec_dest, data_ref);
6429 new_temp = make_ssa_name (vec_dest, new_stmt);
6430 gimple_assign_set_lhs (new_stmt, new_temp);
6431 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6432 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6433 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6434 msq = new_temp;
6435
6436 bump = size_binop (MULT_EXPR, vs_minus_1,
6437 TYPE_SIZE_UNIT (elem_type));
6438 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6439 new_stmt = gimple_build_assign_with_ops
6440 (BIT_AND_EXPR, NULL_TREE, ptr,
6441 build_int_cst
6442 (TREE_TYPE (ptr),
6443 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6444 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6445 gimple_assign_set_lhs (new_stmt, ptr);
6446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6447 data_ref
6448 = build2 (MEM_REF, vectype, ptr,
6449 build_int_cst (reference_alias_ptr_type
6450 (DR_REF (first_dr)), 0));
6451 break;
6452 }
6453 case dr_explicit_realign_optimized:
6454 new_temp = copy_ssa_name (dataref_ptr, NULL);
6455 new_stmt = gimple_build_assign_with_ops
6456 (BIT_AND_EXPR, new_temp, dataref_ptr,
6457 build_int_cst
6458 (TREE_TYPE (dataref_ptr),
6459 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6460 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6461 data_ref
6462 = build2 (MEM_REF, vectype, new_temp,
6463 build_int_cst (reference_alias_ptr_type
6464 (DR_REF (first_dr)), 0));
6465 break;
6466 default:
6467 gcc_unreachable ();
6468 }
6469 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6470 new_stmt = gimple_build_assign (vec_dest, data_ref);
6471 new_temp = make_ssa_name (vec_dest, new_stmt);
6472 gimple_assign_set_lhs (new_stmt, new_temp);
6473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6474
6475 /* 3. Handle explicit realignment if necessary/supported.
6476 Create in loop:
6477 vec_dest = realign_load (msq, lsq, realignment_token) */
6478 if (alignment_support_scheme == dr_explicit_realign_optimized
6479 || alignment_support_scheme == dr_explicit_realign)
6480 {
6481 lsq = gimple_assign_lhs (new_stmt);
6482 if (!realignment_token)
6483 realignment_token = dataref_ptr;
6484 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6485 new_stmt
6486 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6487 vec_dest, msq, lsq,
6488 realignment_token);
6489 new_temp = make_ssa_name (vec_dest, new_stmt);
6490 gimple_assign_set_lhs (new_stmt, new_temp);
6491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6492
6493 if (alignment_support_scheme == dr_explicit_realign_optimized)
6494 {
6495 gcc_assert (phi);
6496 if (i == vec_num - 1 && j == ncopies - 1)
6497 add_phi_arg (phi, lsq,
6498 loop_latch_edge (containing_loop),
6499 UNKNOWN_LOCATION);
6500 msq = lsq;
6501 }
6502 }
6503
6504 /* 4. Handle invariant-load. */
6505 if (inv_p && !bb_vinfo)
6506 {
6507 gcc_assert (!grouped_load);
6508 /* If we have versioned for aliasing or the loop doesn't
6509 have any data dependencies that would preclude this,
6510 then we are sure this is a loop invariant load and
6511 thus we can insert it on the preheader edge. */
6512 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6513 && !nested_in_vect_loop
6514 && hoist_defs_of_uses (stmt, loop))
6515 {
6516 if (dump_enabled_p ())
6517 {
6518 dump_printf_loc (MSG_NOTE, vect_location,
6519 "hoisting out of the vectorized "
6520 "loop: ");
6521 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6522 dump_printf (MSG_NOTE, "\n");
6523 }
6524 tree tem = copy_ssa_name (scalar_dest, NULL);
6525 gsi_insert_on_edge_immediate
6526 (loop_preheader_edge (loop),
6527 gimple_build_assign (tem,
6528 unshare_expr
6529 (gimple_assign_rhs1 (stmt))));
6530 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6531 }
6532 else
6533 {
6534 gimple_stmt_iterator gsi2 = *gsi;
6535 gsi_next (&gsi2);
6536 new_temp = vect_init_vector (stmt, scalar_dest,
6537 vectype, &gsi2);
6538 }
6539 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6540 set_vinfo_for_stmt (new_stmt,
6541 new_stmt_vec_info (new_stmt, loop_vinfo,
6542 bb_vinfo));
6543 }
6544
6545 if (negative)
6546 {
6547 tree perm_mask = perm_mask_for_reverse (vectype);
6548 new_temp = permute_vec_elements (new_temp, new_temp,
6549 perm_mask, stmt, gsi);
6550 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6551 }
6552
6553 /* Collect vector loads and later create their permutation in
6554 vect_transform_grouped_load (). */
6555 if (grouped_load || slp_perm)
6556 dr_chain.quick_push (new_temp);
6557
6558 /* Store vector loads in the corresponding SLP_NODE. */
6559 if (slp && !slp_perm)
6560 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6561 }
6562 /* Bump the vector pointer to account for a gap. */
6563 if (slp && group_gap != 0)
6564 {
6565 tree bump = size_binop (MULT_EXPR,
6566 TYPE_SIZE_UNIT (elem_type),
6567 size_int (group_gap));
6568 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6569 stmt, bump);
6570 }
6571 }
6572
6573 if (slp && !slp_perm)
6574 continue;
6575
6576 if (slp_perm)
6577 {
6578 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6579 slp_node_instance, false))
6580 {
6581 dr_chain.release ();
6582 return false;
6583 }
6584 }
6585 else
6586 {
6587 if (grouped_load)
6588 {
6589 if (!load_lanes_p)
6590 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6591 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6592 }
6593 else
6594 {
6595 if (j == 0)
6596 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6597 else
6598 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6599 prev_stmt_info = vinfo_for_stmt (new_stmt);
6600 }
6601 }
6602 dr_chain.release ();
6603 }
6604
6605 return true;
6606 }
6607
6608 /* Function vect_is_simple_cond.
6609
6610 Input:
6611 LOOP - the loop that is being vectorized.
6612 COND - Condition that is checked for simple use.
6613
6614 Output:
6615 *COMP_VECTYPE - the vector type for the comparison.
6616
6617 Returns whether a COND can be vectorized. Checks whether
6618 condition operands are supportable using vec_is_simple_use. */
6619
6620 static bool
6621 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6622 bb_vec_info bb_vinfo, tree *comp_vectype)
6623 {
6624 tree lhs, rhs;
6625 tree def;
6626 enum vect_def_type dt;
6627 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6628
6629 if (!COMPARISON_CLASS_P (cond))
6630 return false;
6631
6632 lhs = TREE_OPERAND (cond, 0);
6633 rhs = TREE_OPERAND (cond, 1);
6634
6635 if (TREE_CODE (lhs) == SSA_NAME)
6636 {
6637 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6638 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6639 &lhs_def_stmt, &def, &dt, &vectype1))
6640 return false;
6641 }
6642 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6643 && TREE_CODE (lhs) != FIXED_CST)
6644 return false;
6645
6646 if (TREE_CODE (rhs) == SSA_NAME)
6647 {
6648 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6649 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6650 &rhs_def_stmt, &def, &dt, &vectype2))
6651 return false;
6652 }
6653 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6654 && TREE_CODE (rhs) != FIXED_CST)
6655 return false;
6656
6657 *comp_vectype = vectype1 ? vectype1 : vectype2;
6658 return true;
6659 }
6660
6661 /* vectorizable_condition.
6662
6663 Check if STMT is conditional modify expression that can be vectorized.
6664 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6665 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6666 at GSI.
6667
6668 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6669 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6670 else caluse if it is 2).
6671
6672 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6673
6674 bool
6675 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6676 gimple *vec_stmt, tree reduc_def, int reduc_index,
6677 slp_tree slp_node)
6678 {
6679 tree scalar_dest = NULL_TREE;
6680 tree vec_dest = NULL_TREE;
6681 tree cond_expr, then_clause, else_clause;
6682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6683 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6684 tree comp_vectype = NULL_TREE;
6685 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6686 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6687 tree vec_compare, vec_cond_expr;
6688 tree new_temp;
6689 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6690 tree def;
6691 enum vect_def_type dt, dts[4];
6692 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6693 int ncopies;
6694 enum tree_code code;
6695 stmt_vec_info prev_stmt_info = NULL;
6696 int i, j;
6697 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6698 vec<tree> vec_oprnds0 = vNULL;
6699 vec<tree> vec_oprnds1 = vNULL;
6700 vec<tree> vec_oprnds2 = vNULL;
6701 vec<tree> vec_oprnds3 = vNULL;
6702 tree vec_cmp_type;
6703
6704 if (slp_node || PURE_SLP_STMT (stmt_info))
6705 ncopies = 1;
6706 else
6707 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6708
6709 gcc_assert (ncopies >= 1);
6710 if (reduc_index && ncopies > 1)
6711 return false; /* FORNOW */
6712
6713 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6714 return false;
6715
6716 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6717 return false;
6718
6719 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6720 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6721 && reduc_def))
6722 return false;
6723
6724 /* FORNOW: not yet supported. */
6725 if (STMT_VINFO_LIVE_P (stmt_info))
6726 {
6727 if (dump_enabled_p ())
6728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6729 "value used after loop.\n");
6730 return false;
6731 }
6732
6733 /* Is vectorizable conditional operation? */
6734 if (!is_gimple_assign (stmt))
6735 return false;
6736
6737 code = gimple_assign_rhs_code (stmt);
6738
6739 if (code != COND_EXPR)
6740 return false;
6741
6742 cond_expr = gimple_assign_rhs1 (stmt);
6743 then_clause = gimple_assign_rhs2 (stmt);
6744 else_clause = gimple_assign_rhs3 (stmt);
6745
6746 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6747 &comp_vectype)
6748 || !comp_vectype)
6749 return false;
6750
6751 if (TREE_CODE (then_clause) == SSA_NAME)
6752 {
6753 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6754 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6755 &then_def_stmt, &def, &dt))
6756 return false;
6757 }
6758 else if (TREE_CODE (then_clause) != INTEGER_CST
6759 && TREE_CODE (then_clause) != REAL_CST
6760 && TREE_CODE (then_clause) != FIXED_CST)
6761 return false;
6762
6763 if (TREE_CODE (else_clause) == SSA_NAME)
6764 {
6765 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6766 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6767 &else_def_stmt, &def, &dt))
6768 return false;
6769 }
6770 else if (TREE_CODE (else_clause) != INTEGER_CST
6771 && TREE_CODE (else_clause) != REAL_CST
6772 && TREE_CODE (else_clause) != FIXED_CST)
6773 return false;
6774
6775 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6776 /* The result of a vector comparison should be signed type. */
6777 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6778 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6779 if (vec_cmp_type == NULL_TREE)
6780 return false;
6781
6782 if (!vec_stmt)
6783 {
6784 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6785 return expand_vec_cond_expr_p (vectype, comp_vectype);
6786 }
6787
6788 /* Transform. */
6789
6790 if (!slp_node)
6791 {
6792 vec_oprnds0.create (1);
6793 vec_oprnds1.create (1);
6794 vec_oprnds2.create (1);
6795 vec_oprnds3.create (1);
6796 }
6797
6798 /* Handle def. */
6799 scalar_dest = gimple_assign_lhs (stmt);
6800 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6801
6802 /* Handle cond expr. */
6803 for (j = 0; j < ncopies; j++)
6804 {
6805 gimple new_stmt = NULL;
6806 if (j == 0)
6807 {
6808 if (slp_node)
6809 {
6810 auto_vec<tree, 4> ops;
6811 auto_vec<vec<tree>, 4> vec_defs;
6812
6813 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6814 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6815 ops.safe_push (then_clause);
6816 ops.safe_push (else_clause);
6817 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6818 vec_oprnds3 = vec_defs.pop ();
6819 vec_oprnds2 = vec_defs.pop ();
6820 vec_oprnds1 = vec_defs.pop ();
6821 vec_oprnds0 = vec_defs.pop ();
6822
6823 ops.release ();
6824 vec_defs.release ();
6825 }
6826 else
6827 {
6828 gimple gtemp;
6829 vec_cond_lhs =
6830 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6831 stmt, NULL);
6832 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6833 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6834
6835 vec_cond_rhs =
6836 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6837 stmt, NULL);
6838 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6839 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6840 if (reduc_index == 1)
6841 vec_then_clause = reduc_def;
6842 else
6843 {
6844 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6845 stmt, NULL);
6846 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6847 NULL, &gtemp, &def, &dts[2]);
6848 }
6849 if (reduc_index == 2)
6850 vec_else_clause = reduc_def;
6851 else
6852 {
6853 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6854 stmt, NULL);
6855 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6856 NULL, &gtemp, &def, &dts[3]);
6857 }
6858 }
6859 }
6860 else
6861 {
6862 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6863 vec_oprnds0.pop ());
6864 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6865 vec_oprnds1.pop ());
6866 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6867 vec_oprnds2.pop ());
6868 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6869 vec_oprnds3.pop ());
6870 }
6871
6872 if (!slp_node)
6873 {
6874 vec_oprnds0.quick_push (vec_cond_lhs);
6875 vec_oprnds1.quick_push (vec_cond_rhs);
6876 vec_oprnds2.quick_push (vec_then_clause);
6877 vec_oprnds3.quick_push (vec_else_clause);
6878 }
6879
6880 /* Arguments are ready. Create the new vector stmt. */
6881 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6882 {
6883 vec_cond_rhs = vec_oprnds1[i];
6884 vec_then_clause = vec_oprnds2[i];
6885 vec_else_clause = vec_oprnds3[i];
6886
6887 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6888 vec_cond_lhs, vec_cond_rhs);
6889 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6890 vec_compare, vec_then_clause, vec_else_clause);
6891
6892 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6893 new_temp = make_ssa_name (vec_dest, new_stmt);
6894 gimple_assign_set_lhs (new_stmt, new_temp);
6895 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6896 if (slp_node)
6897 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6898 }
6899
6900 if (slp_node)
6901 continue;
6902
6903 if (j == 0)
6904 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6905 else
6906 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6907
6908 prev_stmt_info = vinfo_for_stmt (new_stmt);
6909 }
6910
6911 vec_oprnds0.release ();
6912 vec_oprnds1.release ();
6913 vec_oprnds2.release ();
6914 vec_oprnds3.release ();
6915
6916 return true;
6917 }
6918
6919
6920 /* Make sure the statement is vectorizable. */
6921
6922 bool
6923 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6924 {
6925 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6926 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6927 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6928 bool ok;
6929 tree scalar_type, vectype;
6930 gimple pattern_stmt;
6931 gimple_seq pattern_def_seq;
6932
6933 if (dump_enabled_p ())
6934 {
6935 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6936 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6937 dump_printf (MSG_NOTE, "\n");
6938 }
6939
6940 if (gimple_has_volatile_ops (stmt))
6941 {
6942 if (dump_enabled_p ())
6943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6944 "not vectorized: stmt has volatile operands\n");
6945
6946 return false;
6947 }
6948
6949 /* Skip stmts that do not need to be vectorized. In loops this is expected
6950 to include:
6951 - the COND_EXPR which is the loop exit condition
6952 - any LABEL_EXPRs in the loop
6953 - computations that are used only for array indexing or loop control.
6954 In basic blocks we only analyze statements that are a part of some SLP
6955 instance, therefore, all the statements are relevant.
6956
6957 Pattern statement needs to be analyzed instead of the original statement
6958 if the original statement is not relevant. Otherwise, we analyze both
6959 statements. In basic blocks we are called from some SLP instance
6960 traversal, don't analyze pattern stmts instead, the pattern stmts
6961 already will be part of SLP instance. */
6962
6963 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6964 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6965 && !STMT_VINFO_LIVE_P (stmt_info))
6966 {
6967 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6968 && pattern_stmt
6969 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6970 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6971 {
6972 /* Analyze PATTERN_STMT instead of the original stmt. */
6973 stmt = pattern_stmt;
6974 stmt_info = vinfo_for_stmt (pattern_stmt);
6975 if (dump_enabled_p ())
6976 {
6977 dump_printf_loc (MSG_NOTE, vect_location,
6978 "==> examining pattern statement: ");
6979 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6980 dump_printf (MSG_NOTE, "\n");
6981 }
6982 }
6983 else
6984 {
6985 if (dump_enabled_p ())
6986 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6987
6988 return true;
6989 }
6990 }
6991 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6992 && node == NULL
6993 && pattern_stmt
6994 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6995 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6996 {
6997 /* Analyze PATTERN_STMT too. */
6998 if (dump_enabled_p ())
6999 {
7000 dump_printf_loc (MSG_NOTE, vect_location,
7001 "==> examining pattern statement: ");
7002 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7003 dump_printf (MSG_NOTE, "\n");
7004 }
7005
7006 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7007 return false;
7008 }
7009
7010 if (is_pattern_stmt_p (stmt_info)
7011 && node == NULL
7012 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7013 {
7014 gimple_stmt_iterator si;
7015
7016 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7017 {
7018 gimple pattern_def_stmt = gsi_stmt (si);
7019 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7020 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7021 {
7022 /* Analyze def stmt of STMT if it's a pattern stmt. */
7023 if (dump_enabled_p ())
7024 {
7025 dump_printf_loc (MSG_NOTE, vect_location,
7026 "==> examining pattern def statement: ");
7027 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7028 dump_printf (MSG_NOTE, "\n");
7029 }
7030
7031 if (!vect_analyze_stmt (pattern_def_stmt,
7032 need_to_vectorize, node))
7033 return false;
7034 }
7035 }
7036 }
7037
7038 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7039 {
7040 case vect_internal_def:
7041 break;
7042
7043 case vect_reduction_def:
7044 case vect_nested_cycle:
7045 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7046 || relevance == vect_used_in_outer_by_reduction
7047 || relevance == vect_unused_in_scope));
7048 break;
7049
7050 case vect_induction_def:
7051 case vect_constant_def:
7052 case vect_external_def:
7053 case vect_unknown_def_type:
7054 default:
7055 gcc_unreachable ();
7056 }
7057
7058 if (bb_vinfo)
7059 {
7060 gcc_assert (PURE_SLP_STMT (stmt_info));
7061
7062 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7063 if (dump_enabled_p ())
7064 {
7065 dump_printf_loc (MSG_NOTE, vect_location,
7066 "get vectype for scalar type: ");
7067 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7068 dump_printf (MSG_NOTE, "\n");
7069 }
7070
7071 vectype = get_vectype_for_scalar_type (scalar_type);
7072 if (!vectype)
7073 {
7074 if (dump_enabled_p ())
7075 {
7076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7077 "not SLPed: unsupported data-type ");
7078 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7079 scalar_type);
7080 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7081 }
7082 return false;
7083 }
7084
7085 if (dump_enabled_p ())
7086 {
7087 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7088 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7089 dump_printf (MSG_NOTE, "\n");
7090 }
7091
7092 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7093 }
7094
7095 if (STMT_VINFO_RELEVANT_P (stmt_info))
7096 {
7097 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7098 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7099 || (is_gimple_call (stmt)
7100 && gimple_call_lhs (stmt) == NULL_TREE));
7101 *need_to_vectorize = true;
7102 }
7103
7104 ok = true;
7105 if (!bb_vinfo
7106 && (STMT_VINFO_RELEVANT_P (stmt_info)
7107 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7108 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7109 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7110 || vectorizable_shift (stmt, NULL, NULL, NULL)
7111 || vectorizable_operation (stmt, NULL, NULL, NULL)
7112 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7113 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7114 || vectorizable_call (stmt, NULL, NULL, NULL)
7115 || vectorizable_store (stmt, NULL, NULL, NULL)
7116 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7117 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7118 else
7119 {
7120 if (bb_vinfo)
7121 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7122 || vectorizable_conversion (stmt, NULL, NULL, node)
7123 || vectorizable_shift (stmt, NULL, NULL, node)
7124 || vectorizable_operation (stmt, NULL, NULL, node)
7125 || vectorizable_assignment (stmt, NULL, NULL, node)
7126 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7127 || vectorizable_call (stmt, NULL, NULL, node)
7128 || vectorizable_store (stmt, NULL, NULL, node)
7129 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7130 }
7131
7132 if (!ok)
7133 {
7134 if (dump_enabled_p ())
7135 {
7136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7137 "not vectorized: relevant stmt not ");
7138 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7139 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7140 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7141 }
7142
7143 return false;
7144 }
7145
7146 if (bb_vinfo)
7147 return true;
7148
7149 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7150 need extra handling, except for vectorizable reductions. */
7151 if (STMT_VINFO_LIVE_P (stmt_info)
7152 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7153 ok = vectorizable_live_operation (stmt, NULL, NULL);
7154
7155 if (!ok)
7156 {
7157 if (dump_enabled_p ())
7158 {
7159 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7160 "not vectorized: live stmt not ");
7161 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7162 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7163 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7164 }
7165
7166 return false;
7167 }
7168
7169 return true;
7170 }
7171
7172
7173 /* Function vect_transform_stmt.
7174
7175 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7176
7177 bool
7178 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7179 bool *grouped_store, slp_tree slp_node,
7180 slp_instance slp_node_instance)
7181 {
7182 bool is_store = false;
7183 gimple vec_stmt = NULL;
7184 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7185 bool done;
7186
7187 switch (STMT_VINFO_TYPE (stmt_info))
7188 {
7189 case type_demotion_vec_info_type:
7190 case type_promotion_vec_info_type:
7191 case type_conversion_vec_info_type:
7192 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7193 gcc_assert (done);
7194 break;
7195
7196 case induc_vec_info_type:
7197 gcc_assert (!slp_node);
7198 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7199 gcc_assert (done);
7200 break;
7201
7202 case shift_vec_info_type:
7203 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7204 gcc_assert (done);
7205 break;
7206
7207 case op_vec_info_type:
7208 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7209 gcc_assert (done);
7210 break;
7211
7212 case assignment_vec_info_type:
7213 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7214 gcc_assert (done);
7215 break;
7216
7217 case load_vec_info_type:
7218 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7219 slp_node_instance);
7220 gcc_assert (done);
7221 break;
7222
7223 case store_vec_info_type:
7224 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7225 gcc_assert (done);
7226 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7227 {
7228 /* In case of interleaving, the whole chain is vectorized when the
7229 last store in the chain is reached. Store stmts before the last
7230 one are skipped, and there vec_stmt_info shouldn't be freed
7231 meanwhile. */
7232 *grouped_store = true;
7233 if (STMT_VINFO_VEC_STMT (stmt_info))
7234 is_store = true;
7235 }
7236 else
7237 is_store = true;
7238 break;
7239
7240 case condition_vec_info_type:
7241 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7242 gcc_assert (done);
7243 break;
7244
7245 case call_vec_info_type:
7246 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7247 stmt = gsi_stmt (*gsi);
7248 if (is_gimple_call (stmt)
7249 && gimple_call_internal_p (stmt)
7250 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7251 is_store = true;
7252 break;
7253
7254 case call_simd_clone_vec_info_type:
7255 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7256 stmt = gsi_stmt (*gsi);
7257 break;
7258
7259 case reduc_vec_info_type:
7260 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7261 gcc_assert (done);
7262 break;
7263
7264 default:
7265 if (!STMT_VINFO_LIVE_P (stmt_info))
7266 {
7267 if (dump_enabled_p ())
7268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7269 "stmt not supported.\n");
7270 gcc_unreachable ();
7271 }
7272 }
7273
7274 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7275 is being vectorized, but outside the immediately enclosing loop. */
7276 if (vec_stmt
7277 && STMT_VINFO_LOOP_VINFO (stmt_info)
7278 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7279 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7280 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7281 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7282 || STMT_VINFO_RELEVANT (stmt_info) ==
7283 vect_used_in_outer_by_reduction))
7284 {
7285 struct loop *innerloop = LOOP_VINFO_LOOP (
7286 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7287 imm_use_iterator imm_iter;
7288 use_operand_p use_p;
7289 tree scalar_dest;
7290 gimple exit_phi;
7291
7292 if (dump_enabled_p ())
7293 dump_printf_loc (MSG_NOTE, vect_location,
7294 "Record the vdef for outer-loop vectorization.\n");
7295
7296 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7297 (to be used when vectorizing outer-loop stmts that use the DEF of
7298 STMT). */
7299 if (gimple_code (stmt) == GIMPLE_PHI)
7300 scalar_dest = PHI_RESULT (stmt);
7301 else
7302 scalar_dest = gimple_assign_lhs (stmt);
7303
7304 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7305 {
7306 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7307 {
7308 exit_phi = USE_STMT (use_p);
7309 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7310 }
7311 }
7312 }
7313
7314 /* Handle stmts whose DEF is used outside the loop-nest that is
7315 being vectorized. */
7316 if (STMT_VINFO_LIVE_P (stmt_info)
7317 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7318 {
7319 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7320 gcc_assert (done);
7321 }
7322
7323 if (vec_stmt)
7324 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7325
7326 return is_store;
7327 }
7328
7329
7330 /* Remove a group of stores (for SLP or interleaving), free their
7331 stmt_vec_info. */
7332
7333 void
7334 vect_remove_stores (gimple first_stmt)
7335 {
7336 gimple next = first_stmt;
7337 gimple tmp;
7338 gimple_stmt_iterator next_si;
7339
7340 while (next)
7341 {
7342 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7343
7344 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7345 if (is_pattern_stmt_p (stmt_info))
7346 next = STMT_VINFO_RELATED_STMT (stmt_info);
7347 /* Free the attached stmt_vec_info and remove the stmt. */
7348 next_si = gsi_for_stmt (next);
7349 unlink_stmt_vdef (next);
7350 gsi_remove (&next_si, true);
7351 release_defs (next);
7352 free_stmt_vec_info (next);
7353 next = tmp;
7354 }
7355 }
7356
7357
7358 /* Function new_stmt_vec_info.
7359
7360 Create and initialize a new stmt_vec_info struct for STMT. */
7361
7362 stmt_vec_info
7363 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7364 bb_vec_info bb_vinfo)
7365 {
7366 stmt_vec_info res;
7367 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7368
7369 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7370 STMT_VINFO_STMT (res) = stmt;
7371 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7372 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7373 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7374 STMT_VINFO_LIVE_P (res) = false;
7375 STMT_VINFO_VECTYPE (res) = NULL;
7376 STMT_VINFO_VEC_STMT (res) = NULL;
7377 STMT_VINFO_VECTORIZABLE (res) = true;
7378 STMT_VINFO_IN_PATTERN_P (res) = false;
7379 STMT_VINFO_RELATED_STMT (res) = NULL;
7380 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7381 STMT_VINFO_DATA_REF (res) = NULL;
7382
7383 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7384 STMT_VINFO_DR_OFFSET (res) = NULL;
7385 STMT_VINFO_DR_INIT (res) = NULL;
7386 STMT_VINFO_DR_STEP (res) = NULL;
7387 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7388
7389 if (gimple_code (stmt) == GIMPLE_PHI
7390 && is_loop_header_bb_p (gimple_bb (stmt)))
7391 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7392 else
7393 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7394
7395 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7396 STMT_SLP_TYPE (res) = loop_vect;
7397 GROUP_FIRST_ELEMENT (res) = NULL;
7398 GROUP_NEXT_ELEMENT (res) = NULL;
7399 GROUP_SIZE (res) = 0;
7400 GROUP_STORE_COUNT (res) = 0;
7401 GROUP_GAP (res) = 0;
7402 GROUP_SAME_DR_STMT (res) = NULL;
7403
7404 return res;
7405 }
7406
7407
7408 /* Create a hash table for stmt_vec_info. */
7409
7410 void
7411 init_stmt_vec_info_vec (void)
7412 {
7413 gcc_assert (!stmt_vec_info_vec.exists ());
7414 stmt_vec_info_vec.create (50);
7415 }
7416
7417
7418 /* Free hash table for stmt_vec_info. */
7419
7420 void
7421 free_stmt_vec_info_vec (void)
7422 {
7423 unsigned int i;
7424 vec_void_p info;
7425 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7426 if (info != NULL)
7427 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7428 gcc_assert (stmt_vec_info_vec.exists ());
7429 stmt_vec_info_vec.release ();
7430 }
7431
7432
7433 /* Free stmt vectorization related info. */
7434
7435 void
7436 free_stmt_vec_info (gimple stmt)
7437 {
7438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7439
7440 if (!stmt_info)
7441 return;
7442
7443 /* Check if this statement has a related "pattern stmt"
7444 (introduced by the vectorizer during the pattern recognition
7445 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7446 too. */
7447 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7448 {
7449 stmt_vec_info patt_info
7450 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7451 if (patt_info)
7452 {
7453 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7454 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7455 gimple_set_bb (patt_stmt, NULL);
7456 tree lhs = gimple_get_lhs (patt_stmt);
7457 if (TREE_CODE (lhs) == SSA_NAME)
7458 release_ssa_name (lhs);
7459 if (seq)
7460 {
7461 gimple_stmt_iterator si;
7462 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7463 {
7464 gimple seq_stmt = gsi_stmt (si);
7465 gimple_set_bb (seq_stmt, NULL);
7466 lhs = gimple_get_lhs (patt_stmt);
7467 if (TREE_CODE (lhs) == SSA_NAME)
7468 release_ssa_name (lhs);
7469 free_stmt_vec_info (seq_stmt);
7470 }
7471 }
7472 free_stmt_vec_info (patt_stmt);
7473 }
7474 }
7475
7476 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7477 set_vinfo_for_stmt (stmt, NULL);
7478 free (stmt_info);
7479 }
7480
7481
7482 /* Function get_vectype_for_scalar_type_and_size.
7483
7484 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7485 by the target. */
7486
7487 static tree
7488 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7489 {
7490 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
7491 enum machine_mode simd_mode;
7492 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7493 int nunits;
7494 tree vectype;
7495
7496 if (nbytes == 0)
7497 return NULL_TREE;
7498
7499 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7500 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7501 return NULL_TREE;
7502
7503 /* For vector types of elements whose mode precision doesn't
7504 match their types precision we use a element type of mode
7505 precision. The vectorization routines will have to make sure
7506 they support the proper result truncation/extension.
7507 We also make sure to build vector types with INTEGER_TYPE
7508 component type only. */
7509 if (INTEGRAL_TYPE_P (scalar_type)
7510 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7511 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7512 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7513 TYPE_UNSIGNED (scalar_type));
7514
7515 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7516 When the component mode passes the above test simply use a type
7517 corresponding to that mode. The theory is that any use that
7518 would cause problems with this will disable vectorization anyway. */
7519 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7520 && !INTEGRAL_TYPE_P (scalar_type))
7521 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7522
7523 /* We can't build a vector type of elements with alignment bigger than
7524 their size. */
7525 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7526 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7527 TYPE_UNSIGNED (scalar_type));
7528
7529 /* If we felt back to using the mode fail if there was
7530 no scalar type for it. */
7531 if (scalar_type == NULL_TREE)
7532 return NULL_TREE;
7533
7534 /* If no size was supplied use the mode the target prefers. Otherwise
7535 lookup a vector mode of the specified size. */
7536 if (size == 0)
7537 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7538 else
7539 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7540 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7541 if (nunits <= 1)
7542 return NULL_TREE;
7543
7544 vectype = build_vector_type (scalar_type, nunits);
7545
7546 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7547 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7548 return NULL_TREE;
7549
7550 return vectype;
7551 }
7552
7553 unsigned int current_vector_size;
7554
7555 /* Function get_vectype_for_scalar_type.
7556
7557 Returns the vector type corresponding to SCALAR_TYPE as supported
7558 by the target. */
7559
7560 tree
7561 get_vectype_for_scalar_type (tree scalar_type)
7562 {
7563 tree vectype;
7564 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7565 current_vector_size);
7566 if (vectype
7567 && current_vector_size == 0)
7568 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7569 return vectype;
7570 }
7571
7572 /* Function get_same_sized_vectype
7573
7574 Returns a vector type corresponding to SCALAR_TYPE of size
7575 VECTOR_TYPE if supported by the target. */
7576
7577 tree
7578 get_same_sized_vectype (tree scalar_type, tree vector_type)
7579 {
7580 return get_vectype_for_scalar_type_and_size
7581 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7582 }
7583
7584 /* Function vect_is_simple_use.
7585
7586 Input:
7587 LOOP_VINFO - the vect info of the loop that is being vectorized.
7588 BB_VINFO - the vect info of the basic block that is being vectorized.
7589 OPERAND - operand of STMT in the loop or bb.
7590 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7591
7592 Returns whether a stmt with OPERAND can be vectorized.
7593 For loops, supportable operands are constants, loop invariants, and operands
7594 that are defined by the current iteration of the loop. Unsupportable
7595 operands are those that are defined by a previous iteration of the loop (as
7596 is the case in reduction/induction computations).
7597 For basic blocks, supportable operands are constants and bb invariants.
7598 For now, operands defined outside the basic block are not supported. */
7599
7600 bool
7601 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7602 bb_vec_info bb_vinfo, gimple *def_stmt,
7603 tree *def, enum vect_def_type *dt)
7604 {
7605 basic_block bb;
7606 stmt_vec_info stmt_vinfo;
7607 struct loop *loop = NULL;
7608
7609 if (loop_vinfo)
7610 loop = LOOP_VINFO_LOOP (loop_vinfo);
7611
7612 *def_stmt = NULL;
7613 *def = NULL_TREE;
7614
7615 if (dump_enabled_p ())
7616 {
7617 dump_printf_loc (MSG_NOTE, vect_location,
7618 "vect_is_simple_use: operand ");
7619 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7620 dump_printf (MSG_NOTE, "\n");
7621 }
7622
7623 if (CONSTANT_CLASS_P (operand))
7624 {
7625 *dt = vect_constant_def;
7626 return true;
7627 }
7628
7629 if (is_gimple_min_invariant (operand))
7630 {
7631 *def = operand;
7632 *dt = vect_external_def;
7633 return true;
7634 }
7635
7636 if (TREE_CODE (operand) == PAREN_EXPR)
7637 {
7638 if (dump_enabled_p ())
7639 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7640 operand = TREE_OPERAND (operand, 0);
7641 }
7642
7643 if (TREE_CODE (operand) != SSA_NAME)
7644 {
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7647 "not ssa-name.\n");
7648 return false;
7649 }
7650
7651 *def_stmt = SSA_NAME_DEF_STMT (operand);
7652 if (*def_stmt == NULL)
7653 {
7654 if (dump_enabled_p ())
7655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7656 "no def_stmt.\n");
7657 return false;
7658 }
7659
7660 if (dump_enabled_p ())
7661 {
7662 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7664 dump_printf (MSG_NOTE, "\n");
7665 }
7666
7667 /* Empty stmt is expected only in case of a function argument.
7668 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7669 if (gimple_nop_p (*def_stmt))
7670 {
7671 *def = operand;
7672 *dt = vect_external_def;
7673 return true;
7674 }
7675
7676 bb = gimple_bb (*def_stmt);
7677
7678 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7679 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7680 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7681 *dt = vect_external_def;
7682 else
7683 {
7684 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7685 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7686 }
7687
7688 if (*dt == vect_unknown_def_type
7689 || (stmt
7690 && *dt == vect_double_reduction_def
7691 && gimple_code (stmt) != GIMPLE_PHI))
7692 {
7693 if (dump_enabled_p ())
7694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7695 "Unsupported pattern.\n");
7696 return false;
7697 }
7698
7699 if (dump_enabled_p ())
7700 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7701
7702 switch (gimple_code (*def_stmt))
7703 {
7704 case GIMPLE_PHI:
7705 *def = gimple_phi_result (*def_stmt);
7706 break;
7707
7708 case GIMPLE_ASSIGN:
7709 *def = gimple_assign_lhs (*def_stmt);
7710 break;
7711
7712 case GIMPLE_CALL:
7713 *def = gimple_call_lhs (*def_stmt);
7714 if (*def != NULL)
7715 break;
7716 /* FALLTHRU */
7717 default:
7718 if (dump_enabled_p ())
7719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7720 "unsupported defining stmt:\n");
7721 return false;
7722 }
7723
7724 return true;
7725 }
7726
7727 /* Function vect_is_simple_use_1.
7728
7729 Same as vect_is_simple_use_1 but also determines the vector operand
7730 type of OPERAND and stores it to *VECTYPE. If the definition of
7731 OPERAND is vect_uninitialized_def, vect_constant_def or
7732 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7733 is responsible to compute the best suited vector type for the
7734 scalar operand. */
7735
7736 bool
7737 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7738 bb_vec_info bb_vinfo, gimple *def_stmt,
7739 tree *def, enum vect_def_type *dt, tree *vectype)
7740 {
7741 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7742 def, dt))
7743 return false;
7744
7745 /* Now get a vector type if the def is internal, otherwise supply
7746 NULL_TREE and leave it up to the caller to figure out a proper
7747 type for the use stmt. */
7748 if (*dt == vect_internal_def
7749 || *dt == vect_induction_def
7750 || *dt == vect_reduction_def
7751 || *dt == vect_double_reduction_def
7752 || *dt == vect_nested_cycle)
7753 {
7754 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7755
7756 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7757 && !STMT_VINFO_RELEVANT (stmt_info)
7758 && !STMT_VINFO_LIVE_P (stmt_info))
7759 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7760
7761 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7762 gcc_assert (*vectype != NULL_TREE);
7763 }
7764 else if (*dt == vect_uninitialized_def
7765 || *dt == vect_constant_def
7766 || *dt == vect_external_def)
7767 *vectype = NULL_TREE;
7768 else
7769 gcc_unreachable ();
7770
7771 return true;
7772 }
7773
7774
7775 /* Function supportable_widening_operation
7776
7777 Check whether an operation represented by the code CODE is a
7778 widening operation that is supported by the target platform in
7779 vector form (i.e., when operating on arguments of type VECTYPE_IN
7780 producing a result of type VECTYPE_OUT).
7781
7782 Widening operations we currently support are NOP (CONVERT), FLOAT
7783 and WIDEN_MULT. This function checks if these operations are supported
7784 by the target platform either directly (via vector tree-codes), or via
7785 target builtins.
7786
7787 Output:
7788 - CODE1 and CODE2 are codes of vector operations to be used when
7789 vectorizing the operation, if available.
7790 - MULTI_STEP_CVT determines the number of required intermediate steps in
7791 case of multi-step conversion (like char->short->int - in that case
7792 MULTI_STEP_CVT will be 1).
7793 - INTERM_TYPES contains the intermediate type required to perform the
7794 widening operation (short in the above example). */
7795
7796 bool
7797 supportable_widening_operation (enum tree_code code, gimple stmt,
7798 tree vectype_out, tree vectype_in,
7799 enum tree_code *code1, enum tree_code *code2,
7800 int *multi_step_cvt,
7801 vec<tree> *interm_types)
7802 {
7803 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7804 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7805 struct loop *vect_loop = NULL;
7806 enum machine_mode vec_mode;
7807 enum insn_code icode1, icode2;
7808 optab optab1, optab2;
7809 tree vectype = vectype_in;
7810 tree wide_vectype = vectype_out;
7811 enum tree_code c1, c2;
7812 int i;
7813 tree prev_type, intermediate_type;
7814 enum machine_mode intermediate_mode, prev_mode;
7815 optab optab3, optab4;
7816
7817 *multi_step_cvt = 0;
7818 if (loop_info)
7819 vect_loop = LOOP_VINFO_LOOP (loop_info);
7820
7821 switch (code)
7822 {
7823 case WIDEN_MULT_EXPR:
7824 /* The result of a vectorized widening operation usually requires
7825 two vectors (because the widened results do not fit into one vector).
7826 The generated vector results would normally be expected to be
7827 generated in the same order as in the original scalar computation,
7828 i.e. if 8 results are generated in each vector iteration, they are
7829 to be organized as follows:
7830 vect1: [res1,res2,res3,res4],
7831 vect2: [res5,res6,res7,res8].
7832
7833 However, in the special case that the result of the widening
7834 operation is used in a reduction computation only, the order doesn't
7835 matter (because when vectorizing a reduction we change the order of
7836 the computation). Some targets can take advantage of this and
7837 generate more efficient code. For example, targets like Altivec,
7838 that support widen_mult using a sequence of {mult_even,mult_odd}
7839 generate the following vectors:
7840 vect1: [res1,res3,res5,res7],
7841 vect2: [res2,res4,res6,res8].
7842
7843 When vectorizing outer-loops, we execute the inner-loop sequentially
7844 (each vectorized inner-loop iteration contributes to VF outer-loop
7845 iterations in parallel). We therefore don't allow to change the
7846 order of the computation in the inner-loop during outer-loop
7847 vectorization. */
7848 /* TODO: Another case in which order doesn't *really* matter is when we
7849 widen and then contract again, e.g. (short)((int)x * y >> 8).
7850 Normally, pack_trunc performs an even/odd permute, whereas the
7851 repack from an even/odd expansion would be an interleave, which
7852 would be significantly simpler for e.g. AVX2. */
7853 /* In any case, in order to avoid duplicating the code below, recurse
7854 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7855 are properly set up for the caller. If we fail, we'll continue with
7856 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7857 if (vect_loop
7858 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7859 && !nested_in_vect_loop_p (vect_loop, stmt)
7860 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7861 stmt, vectype_out, vectype_in,
7862 code1, code2, multi_step_cvt,
7863 interm_types))
7864 {
7865 /* Elements in a vector with vect_used_by_reduction property cannot
7866 be reordered if the use chain with this property does not have the
7867 same operation. One such an example is s += a * b, where elements
7868 in a and b cannot be reordered. Here we check if the vector defined
7869 by STMT is only directly used in the reduction statement. */
7870 tree lhs = gimple_assign_lhs (stmt);
7871 use_operand_p dummy;
7872 gimple use_stmt;
7873 stmt_vec_info use_stmt_info = NULL;
7874 if (single_imm_use (lhs, &dummy, &use_stmt)
7875 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7876 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7877 return true;
7878 }
7879 c1 = VEC_WIDEN_MULT_LO_EXPR;
7880 c2 = VEC_WIDEN_MULT_HI_EXPR;
7881 break;
7882
7883 case VEC_WIDEN_MULT_EVEN_EXPR:
7884 /* Support the recursion induced just above. */
7885 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7886 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7887 break;
7888
7889 case WIDEN_LSHIFT_EXPR:
7890 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7891 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7892 break;
7893
7894 CASE_CONVERT:
7895 c1 = VEC_UNPACK_LO_EXPR;
7896 c2 = VEC_UNPACK_HI_EXPR;
7897 break;
7898
7899 case FLOAT_EXPR:
7900 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7901 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7902 break;
7903
7904 case FIX_TRUNC_EXPR:
7905 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7906 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7907 computing the operation. */
7908 return false;
7909
7910 default:
7911 gcc_unreachable ();
7912 }
7913
7914 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7915 {
7916 enum tree_code ctmp = c1;
7917 c1 = c2;
7918 c2 = ctmp;
7919 }
7920
7921 if (code == FIX_TRUNC_EXPR)
7922 {
7923 /* The signedness is determined from output operand. */
7924 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7925 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7926 }
7927 else
7928 {
7929 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7930 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7931 }
7932
7933 if (!optab1 || !optab2)
7934 return false;
7935
7936 vec_mode = TYPE_MODE (vectype);
7937 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7938 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7939 return false;
7940
7941 *code1 = c1;
7942 *code2 = c2;
7943
7944 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7945 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7946 return true;
7947
7948 /* Check if it's a multi-step conversion that can be done using intermediate
7949 types. */
7950
7951 prev_type = vectype;
7952 prev_mode = vec_mode;
7953
7954 if (!CONVERT_EXPR_CODE_P (code))
7955 return false;
7956
7957 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7958 intermediate steps in promotion sequence. We try
7959 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7960 not. */
7961 interm_types->create (MAX_INTERM_CVT_STEPS);
7962 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7963 {
7964 intermediate_mode = insn_data[icode1].operand[0].mode;
7965 intermediate_type
7966 = lang_hooks.types.type_for_mode (intermediate_mode,
7967 TYPE_UNSIGNED (prev_type));
7968 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7969 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7970
7971 if (!optab3 || !optab4
7972 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7973 || insn_data[icode1].operand[0].mode != intermediate_mode
7974 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7975 || insn_data[icode2].operand[0].mode != intermediate_mode
7976 || ((icode1 = optab_handler (optab3, intermediate_mode))
7977 == CODE_FOR_nothing)
7978 || ((icode2 = optab_handler (optab4, intermediate_mode))
7979 == CODE_FOR_nothing))
7980 break;
7981
7982 interm_types->quick_push (intermediate_type);
7983 (*multi_step_cvt)++;
7984
7985 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7986 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7987 return true;
7988
7989 prev_type = intermediate_type;
7990 prev_mode = intermediate_mode;
7991 }
7992
7993 interm_types->release ();
7994 return false;
7995 }
7996
7997
7998 /* Function supportable_narrowing_operation
7999
8000 Check whether an operation represented by the code CODE is a
8001 narrowing operation that is supported by the target platform in
8002 vector form (i.e., when operating on arguments of type VECTYPE_IN
8003 and producing a result of type VECTYPE_OUT).
8004
8005 Narrowing operations we currently support are NOP (CONVERT) and
8006 FIX_TRUNC. This function checks if these operations are supported by
8007 the target platform directly via vector tree-codes.
8008
8009 Output:
8010 - CODE1 is the code of a vector operation to be used when
8011 vectorizing the operation, if available.
8012 - MULTI_STEP_CVT determines the number of required intermediate steps in
8013 case of multi-step conversion (like int->short->char - in that case
8014 MULTI_STEP_CVT will be 1).
8015 - INTERM_TYPES contains the intermediate type required to perform the
8016 narrowing operation (short in the above example). */
8017
8018 bool
8019 supportable_narrowing_operation (enum tree_code code,
8020 tree vectype_out, tree vectype_in,
8021 enum tree_code *code1, int *multi_step_cvt,
8022 vec<tree> *interm_types)
8023 {
8024 enum machine_mode vec_mode;
8025 enum insn_code icode1;
8026 optab optab1, interm_optab;
8027 tree vectype = vectype_in;
8028 tree narrow_vectype = vectype_out;
8029 enum tree_code c1;
8030 tree intermediate_type;
8031 enum machine_mode intermediate_mode, prev_mode;
8032 int i;
8033 bool uns;
8034
8035 *multi_step_cvt = 0;
8036 switch (code)
8037 {
8038 CASE_CONVERT:
8039 c1 = VEC_PACK_TRUNC_EXPR;
8040 break;
8041
8042 case FIX_TRUNC_EXPR:
8043 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8044 break;
8045
8046 case FLOAT_EXPR:
8047 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8048 tree code and optabs used for computing the operation. */
8049 return false;
8050
8051 default:
8052 gcc_unreachable ();
8053 }
8054
8055 if (code == FIX_TRUNC_EXPR)
8056 /* The signedness is determined from output operand. */
8057 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8058 else
8059 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8060
8061 if (!optab1)
8062 return false;
8063
8064 vec_mode = TYPE_MODE (vectype);
8065 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8066 return false;
8067
8068 *code1 = c1;
8069
8070 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8071 return true;
8072
8073 /* Check if it's a multi-step conversion that can be done using intermediate
8074 types. */
8075 prev_mode = vec_mode;
8076 if (code == FIX_TRUNC_EXPR)
8077 uns = TYPE_UNSIGNED (vectype_out);
8078 else
8079 uns = TYPE_UNSIGNED (vectype);
8080
8081 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8082 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8083 costly than signed. */
8084 if (code == FIX_TRUNC_EXPR && uns)
8085 {
8086 enum insn_code icode2;
8087
8088 intermediate_type
8089 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8090 interm_optab
8091 = optab_for_tree_code (c1, intermediate_type, optab_default);
8092 if (interm_optab != unknown_optab
8093 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8094 && insn_data[icode1].operand[0].mode
8095 == insn_data[icode2].operand[0].mode)
8096 {
8097 uns = false;
8098 optab1 = interm_optab;
8099 icode1 = icode2;
8100 }
8101 }
8102
8103 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8104 intermediate steps in promotion sequence. We try
8105 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8106 interm_types->create (MAX_INTERM_CVT_STEPS);
8107 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8108 {
8109 intermediate_mode = insn_data[icode1].operand[0].mode;
8110 intermediate_type
8111 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8112 interm_optab
8113 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8114 optab_default);
8115 if (!interm_optab
8116 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8117 || insn_data[icode1].operand[0].mode != intermediate_mode
8118 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8119 == CODE_FOR_nothing))
8120 break;
8121
8122 interm_types->quick_push (intermediate_type);
8123 (*multi_step_cvt)++;
8124
8125 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8126 return true;
8127
8128 prev_mode = intermediate_mode;
8129 optab1 = interm_optab;
8130 }
8131
8132 interm_types->release ();
8133 return false;
8134 }