3768dcd114bc0fbf61226d1672df4d5ef5bb8e5a
[gcc.git] / gcc / tree-vect-stmts.c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
40
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
43
44 /* Return the vectorized type for the given statement. */
45
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
48 {
49 return STMT_VINFO_VECTYPE (stmt_info);
50 }
51
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56 {
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
61
62 if (!loop_vinfo)
63 return false;
64
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67 return (bb->loop_father == loop->inner);
68 }
69
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
73
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign, enum vect_cost_model_location where)
78 {
79 if (body_cost_vec)
80 {
81 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82 add_stmt_info_to_vec (body_cost_vec, count, kind,
83 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84 misalign);
85 return (unsigned)
86 (builtin_vectorization_cost (kind, vectype, misalign) * count);
87
88 }
89 else
90 {
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
94
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101 misalign, where);
102 }
103 }
104
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
106
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109 {
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
112 }
113
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
118
119 static tree
120 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
122 {
123 tree vect_type, vect, vect_name, array_ref;
124 gimple new_stmt;
125
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
132
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
137
138 return vect_name;
139 }
140
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
144
145 static void
146 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
148 {
149 tree array_ref;
150 gimple new_stmt;
151
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
155
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
158 }
159
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
163
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166 {
167 tree mem_ref, alias_ptr_type;
168
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
174 }
175
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177
178 /* Function vect_mark_relevant.
179
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181
182 static void
183 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
184 enum vect_relevant relevant, bool live_p,
185 bool used_in_pattern)
186 {
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190 gimple pattern_stmt;
191
192 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d.", relevant, live_p);
195
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201 {
202 bool found = false;
203 if (!used_in_pattern)
204 {
205 imm_use_iterator imm_iter;
206 use_operand_p use_p;
207 gimple use_stmt;
208 tree lhs;
209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211
212 if (is_gimple_assign (stmt))
213 lhs = gimple_assign_lhs (stmt);
214 else
215 lhs = gimple_call_lhs (stmt);
216
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
219 stmt. */
220 if (TREE_CODE (lhs) == SSA_NAME)
221 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222 {
223 if (is_gimple_debug (USE_STMT (use_p)))
224 continue;
225 use_stmt = USE_STMT (use_p);
226
227 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228 continue;
229
230 if (vinfo_for_stmt (use_stmt)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232 {
233 found = true;
234 break;
235 }
236 }
237 }
238
239 if (!found)
240 {
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
245
246 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247
248 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE, vect_location,
250 "last stmt in pattern. don't mark"
251 " relevant/live.");
252 stmt_info = vinfo_for_stmt (pattern_stmt);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256 stmt = pattern_stmt;
257 }
258 }
259
260 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262 STMT_VINFO_RELEVANT (stmt_info) = relevant;
263
264 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266 {
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE, vect_location,
269 "already marked relevant/live.");
270 return;
271 }
272
273 worklist->safe_push (stmt);
274 }
275
276
277 /* Function vect_stmt_relevant_p.
278
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
281
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
286
287 CHECKME: what other side effects would the vectorizer allow? */
288
289 static bool
290 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291 enum vect_relevant *relevant, bool *live_p)
292 {
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 ssa_op_iter op_iter;
295 imm_use_iterator imm_iter;
296 use_operand_p use_p;
297 def_operand_p def_p;
298
299 *relevant = vect_unused_in_scope;
300 *live_p = false;
301
302 /* cond stmt other than loop exit cond. */
303 if (is_ctrl_stmt (stmt)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305 != loop_exit_ctrl_vec_info_type)
306 *relevant = vect_used_in_scope;
307
308 /* changing memory. */
309 if (gimple_code (stmt) != GIMPLE_PHI)
310 if (gimple_vdef (stmt))
311 {
312 if (dump_enabled_p ())
313 dump_printf_loc (MSG_NOTE, vect_location,
314 "vec_stmt_relevant_p: stmt has vdefs.");
315 *relevant = vect_used_in_scope;
316 }
317
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320 {
321 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322 {
323 basic_block bb = gimple_bb (USE_STMT (use_p));
324 if (!flow_bb_inside_loop_p (loop, bb))
325 {
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_NOTE, vect_location,
328 "vec_stmt_relevant_p: used out of loop.");
329
330 if (is_gimple_debug (USE_STMT (use_p)))
331 continue;
332
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336 gcc_assert (bb == single_exit (loop)->dest);
337
338 *live_p = true;
339 }
340 }
341 }
342
343 return (*live_p || *relevant);
344 }
345
346
347 /* Function exist_non_indexing_operands_for_use_p
348
349 USE is one of the uses attached to STMT. Check if USE is
350 used in STMT for anything other than indexing an array. */
351
352 static bool
353 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354 {
355 tree operand;
356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
357
358 /* USE corresponds to some operand in STMT. If there is no data
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info))
362 return true;
363
364 /* STMT has a data_ref. FORNOW this means that its of one of
365 the following forms:
366 -1- ARRAY_REF = var
367 -2- var = ARRAY_REF
368 (This should have been verified in analyze_data_refs).
369
370 'var' in the second case corresponds to a def, not a use,
371 so USE cannot correspond to any operands that are not used
372 for array indexing.
373
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
376
377 if (!gimple_assign_copy_p (stmt))
378 return false;
379 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380 return false;
381 operand = gimple_assign_rhs1 (stmt);
382 if (TREE_CODE (operand) != SSA_NAME)
383 return false;
384
385 if (operand == use)
386 return true;
387
388 return false;
389 }
390
391
392 /*
393 Function process_use.
394
395 Inputs:
396 - a USE in STMT in a loop represented by LOOP_VINFO
397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398 that defined USE. This is done by calling mark_relevant and passing it
399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401 be performed.
402
403 Outputs:
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408 Exceptions:
409 - case 1: If USE is used only for address computations (e.g. array indexing),
410 which does not need to be directly vectorized, then the liveness/relevance
411 of the respective DEF_STMT is left unchanged.
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
416
417 Return true if everything is as expected. Return false otherwise. */
418
419 static bool
420 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
421 enum vect_relevant relevant, vec<gimple> *worklist,
422 bool force)
423 {
424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426 stmt_vec_info dstmt_vinfo;
427 basic_block bb, def_bb;
428 tree def;
429 gimple def_stmt;
430 enum vect_def_type dt;
431
432 /* case 1: we are only interested in uses that need to be vectorized. Uses
433 that are used for address computation are not considered relevant. */
434 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
435 return true;
436
437 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
438 {
439 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441 "not vectorized: unsupported use in stmt.");
442 return false;
443 }
444
445 if (!def_stmt || gimple_nop_p (def_stmt))
446 return true;
447
448 def_bb = gimple_bb (def_stmt);
449 if (!flow_bb_inside_loop_p (loop, def_bb))
450 {
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
453 return true;
454 }
455
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo = vinfo_for_stmt (def_stmt);
462 bb = gimple_bb (stmt);
463 if (gimple_code (stmt) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 && gimple_code (def_stmt) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467 && bb->loop_father == def_bb->loop_father)
468 {
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE, vect_location,
471 "reduc-stmt defining reduc-phi in the same nest.");
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
476 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
477 return true;
478 }
479
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
482 d = def_stmt
483 inner-loop:
484 stmt # use (d)
485 outer-loop-tail-bb:
486 ... */
487 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488 {
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "outer-loop def-stmt defining inner-loop stmt.");
492
493 switch (relevant)
494 {
495 case vect_unused_in_scope:
496 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497 vect_used_in_scope : vect_unused_in_scope;
498 break;
499
500 case vect_used_in_outer_by_reduction:
501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
502 relevant = vect_used_by_reduction;
503 break;
504
505 case vect_used_in_outer:
506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
507 relevant = vect_used_in_scope;
508 break;
509
510 case vect_used_in_scope:
511 break;
512
513 default:
514 gcc_unreachable ();
515 }
516 }
517
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
520 ...
521 inner-loop:
522 d = def_stmt
523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
524 stmt # use (d) */
525 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526 {
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "inner-loop def-stmt defining outer-loop stmt.");
530
531 switch (relevant)
532 {
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
536 vect_used_in_outer_by_reduction : vect_unused_in_scope;
537 break;
538
539 case vect_used_by_reduction:
540 relevant = vect_used_in_outer_by_reduction;
541 break;
542
543 case vect_used_in_scope:
544 relevant = vect_used_in_outer;
545 break;
546
547 default:
548 gcc_unreachable ();
549 }
550 }
551
552 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553 is_pattern_stmt_p (stmt_vinfo));
554 return true;
555 }
556
557
558 /* Function vect_mark_stmts_to_be_vectorized.
559
560 Not all stmts in the loop need to be vectorized. For example:
561
562 for i...
563 for j...
564 1. T0 = i + j
565 2. T1 = a[T0]
566
567 3. j = j + 1
568
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
571
572 This pass detects such stmts. */
573
574 bool
575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576 {
577 vec<gimple> worklist;
578 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580 unsigned int nbbs = loop->num_nodes;
581 gimple_stmt_iterator si;
582 gimple stmt;
583 unsigned int i;
584 stmt_vec_info stmt_vinfo;
585 basic_block bb;
586 gimple phi;
587 bool live_p;
588 enum vect_relevant relevant, tmp_relevant;
589 enum vect_def_type def_type;
590
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE, vect_location,
593 "=== vect_mark_stmts_to_be_vectorized ===");
594
595 worklist.create (64);
596
597 /* 1. Init worklist. */
598 for (i = 0; i < nbbs; i++)
599 {
600 bb = bbs[i];
601 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
602 {
603 phi = gsi_stmt (si);
604 if (dump_enabled_p ())
605 {
606 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
608 }
609
610 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
611 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
612 }
613 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
614 {
615 stmt = gsi_stmt (si);
616 if (dump_enabled_p ())
617 {
618 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
620 }
621
622 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
623 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
624 }
625 }
626
627 /* 2. Process_worklist */
628 while (worklist.length () > 0)
629 {
630 use_operand_p use_p;
631 ssa_op_iter iter;
632
633 stmt = worklist.pop ();
634 if (dump_enabled_p ())
635 {
636 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
638 }
639
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642 liveness and relevance properties of STMT. */
643 stmt_vinfo = vinfo_for_stmt (stmt);
644 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
646
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
651
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
654 live_p = false
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
657 those that are used by a reduction computation, and those that are
658 (also) used by a regular computation. This allows us later on to
659 identify stmts that are used solely by a reduction, and therefore the
660 order of the results that they produce does not have to be kept. */
661
662 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663 tmp_relevant = relevant;
664 switch (def_type)
665 {
666 case vect_reduction_def:
667 switch (tmp_relevant)
668 {
669 case vect_unused_in_scope:
670 relevant = vect_used_by_reduction;
671 break;
672
673 case vect_used_by_reduction:
674 if (gimple_code (stmt) == GIMPLE_PHI)
675 break;
676 /* fall through */
677
678 default:
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681 "unsupported use of reduction.");
682 worklist.release ();
683 return false;
684 }
685
686 live_p = false;
687 break;
688
689 case vect_nested_cycle:
690 if (tmp_relevant != vect_unused_in_scope
691 && tmp_relevant != vect_used_in_outer_by_reduction
692 && tmp_relevant != vect_used_in_outer)
693 {
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of nested cycle.");
697
698 worklist.release ();
699 return false;
700 }
701
702 live_p = false;
703 break;
704
705 case vect_double_reduction_def:
706 if (tmp_relevant != vect_unused_in_scope
707 && tmp_relevant != vect_used_by_reduction)
708 {
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711 "unsupported use of double reduction.");
712
713 worklist.release ();
714 return false;
715 }
716
717 live_p = false;
718 break;
719
720 default:
721 break;
722 }
723
724 if (is_pattern_stmt_p (stmt_vinfo))
725 {
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
730 {
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
733
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736 {
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738 live_p, relevant, &worklist, false)
739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740 live_p, relevant, &worklist, false))
741 {
742 worklist.release ();
743 return false;
744 }
745 i = 2;
746 }
747 for (; i < gimple_num_ops (stmt); i++)
748 {
749 op = gimple_op (stmt, i);
750 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
751 &worklist, false))
752 {
753 worklist.release ();
754 return false;
755 }
756 }
757 }
758 else if (is_gimple_call (stmt))
759 {
760 for (i = 0; i < gimple_call_num_args (stmt); i++)
761 {
762 tree arg = gimple_call_arg (stmt, i);
763 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
764 &worklist, false))
765 {
766 worklist.release ();
767 return false;
768 }
769 }
770 }
771 }
772 else
773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 {
775 tree op = USE_FROM_PTR (use_p);
776 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
777 &worklist, false))
778 {
779 worklist.release ();
780 return false;
781 }
782 }
783
784 if (STMT_VINFO_GATHER_P (stmt_vinfo))
785 {
786 tree off;
787 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788 gcc_assert (decl);
789 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790 &worklist, true))
791 {
792 worklist.release ();
793 return false;
794 }
795 }
796 } /* while worklist */
797
798 worklist.release ();
799 return true;
800 }
801
802
803 /* Function vect_model_simple_cost.
804
805 Models cost for simple operations, i.e. those that only emit ncopies of a
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
808
809 void
810 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
811 enum vect_def_type *dt,
812 stmt_vector_for_cost *prologue_cost_vec,
813 stmt_vector_for_cost *body_cost_vec)
814 {
815 int i;
816 int inside_cost = 0, prologue_cost = 0;
817
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info))
820 return;
821
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i = 0; i < 2; i++)
824 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826 stmt_info, 0, vect_prologue);
827
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
829 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830 stmt_info, 0, vect_body);
831
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_NOTE, vect_location,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost, prologue_cost);
836 }
837
838
839 /* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
843
844 static void
845 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846 enum vect_def_type *dt, int pwr)
847 {
848 int i, tmp;
849 int inside_cost = 0, prologue_cost = 0;
850 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852 void *target_cost_data;
853
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
857
858 if (loop_vinfo)
859 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860 else
861 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
862
863 for (i = 0; i < pwr + 1; i++)
864 {
865 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866 (i + 1) : i;
867 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
868 vec_promote_demote, stmt_info, 0,
869 vect_body);
870 }
871
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i = 0; i < 2; i++)
874 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876 stmt_info, 0, vect_prologue);
877
878 if (dump_enabled_p ())
879 dump_printf_loc (MSG_NOTE, vect_location,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost, prologue_cost);
882 }
883
884 /* Function vect_cost_group_size
885
886 For grouped load or store, return the group_size only if it is the first
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
889
890 static int
891 vect_cost_group_size (stmt_vec_info stmt_info)
892 {
893 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
894
895 if (first_stmt == STMT_VINFO_STMT (stmt_info))
896 return GROUP_SIZE (stmt_info);
897
898 return 1;
899 }
900
901
902 /* Function vect_model_store_cost
903
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
906
907 void
908 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909 bool store_lanes_p, enum vect_def_type dt,
910 slp_tree slp_node,
911 stmt_vector_for_cost *prologue_cost_vec,
912 stmt_vector_for_cost *body_cost_vec)
913 {
914 int group_size;
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 struct data_reference *first_dr;
917 gimple first_stmt;
918
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info))
921 return;
922
923 if (dt == vect_constant_def || dt == vect_external_def)
924 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925 stmt_info, 0, vect_prologue);
926
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
929 {
930 if (slp_node)
931 {
932 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933 group_size = 1;
934 }
935 else
936 {
937 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 group_size = vect_cost_group_size (stmt_info);
939 }
940
941 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942 }
943 /* Not a grouped access. */
944 else
945 {
946 group_size = 1;
947 first_dr = STMT_VINFO_DATA_REF (stmt_info);
948 }
949
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p && group_size > 1)
955 {
956 /* Uses a high and low interleave operation for each needed permute. */
957
958 int nstmts = ncopies * exact_log2 (group_size) * group_size;
959 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960 stmt_info, 0, vect_body);
961
962 if (dump_enabled_p ())
963 dump_printf_loc (MSG_NOTE, vect_location,
964 "vect_model_store_cost: strided group_size = %d .",
965 group_size);
966 }
967
968 /* Costs of the stores. */
969 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
970
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost, prologue_cost);
975 }
976
977
978 /* Calculate cost of DR's memory access. */
979 void
980 vect_get_store_cost (struct data_reference *dr, int ncopies,
981 unsigned int *inside_cost,
982 stmt_vector_for_cost *body_cost_vec)
983 {
984 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
985 gimple stmt = DR_STMT (dr);
986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
987
988 switch (alignment_support_scheme)
989 {
990 case dr_aligned:
991 {
992 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993 vector_store, stmt_info, 0,
994 vect_body);
995
996 if (dump_enabled_p ())
997 dump_printf_loc (MSG_NOTE, vect_location,
998 "vect_model_store_cost: aligned.");
999 break;
1000 }
1001
1002 case dr_unaligned_supported:
1003 {
1004 /* Here, we assign an additional cost for the unaligned store. */
1005 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1006 unaligned_store, stmt_info,
1007 DR_MISALIGNMENT (dr), vect_body);
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE, vect_location,
1010 "vect_model_store_cost: unaligned supported by "
1011 "hardware.");
1012 break;
1013 }
1014
1015 case dr_unaligned_unsupported:
1016 {
1017 *inside_cost = VECT_MAX_COST;
1018
1019 if (dump_enabled_p ())
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021 "vect_model_store_cost: unsupported access.");
1022 break;
1023 }
1024
1025 default:
1026 gcc_unreachable ();
1027 }
1028 }
1029
1030
1031 /* Function vect_model_load_cost
1032
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
1035 accesses are supported for loads, we also account for the costs of the
1036 access scheme chosen. */
1037
1038 void
1039 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040 bool load_lanes_p, slp_tree slp_node,
1041 stmt_vector_for_cost *prologue_cost_vec,
1042 stmt_vector_for_cost *body_cost_vec)
1043 {
1044 int group_size;
1045 gimple first_stmt;
1046 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1047 unsigned int inside_cost = 0, prologue_cost = 0;
1048
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info))
1051 return;
1052
1053 /* Grouped accesses? */
1054 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1056 {
1057 group_size = vect_cost_group_size (stmt_info);
1058 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059 }
1060 /* Not a grouped access. */
1061 else
1062 {
1063 group_size = 1;
1064 first_dr = dr;
1065 }
1066
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p && group_size > 1)
1072 {
1073 /* Uses an even and odd extract operations for each needed permute. */
1074 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1075 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076 stmt_info, 0, vect_body);
1077
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE, vect_location,
1080 "vect_model_load_cost: strided group_size = %d .",
1081 group_size);
1082 }
1083
1084 /* The loads themselves. */
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1086 {
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1089 inside_cost += record_stmt_cost (body_cost_vec,
1090 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1091 scalar_load, stmt_info, 0, vect_body);
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1094 }
1095 else
1096 vect_get_load_cost (first_dr, ncopies,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098 || group_size > 1 || slp_node),
1099 &inside_cost, &prologue_cost,
1100 prologue_cost_vec, body_cost_vec, true);
1101
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost, prologue_cost);
1106 }
1107
1108
1109 /* Calculate cost of DR's memory access. */
1110 void
1111 vect_get_load_cost (struct data_reference *dr, int ncopies,
1112 bool add_realign_cost, unsigned int *inside_cost,
1113 unsigned int *prologue_cost,
1114 stmt_vector_for_cost *prologue_cost_vec,
1115 stmt_vector_for_cost *body_cost_vec,
1116 bool record_prologue_costs)
1117 {
1118 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1119 gimple stmt = DR_STMT (dr);
1120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1121
1122 switch (alignment_support_scheme)
1123 {
1124 case dr_aligned:
1125 {
1126 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127 stmt_info, 0, vect_body);
1128
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE, vect_location,
1131 "vect_model_load_cost: aligned.");
1132
1133 break;
1134 }
1135 case dr_unaligned_supported:
1136 {
1137 /* Here, we assign an additional cost for the unaligned load. */
1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1139 unaligned_load, stmt_info,
1140 DR_MISALIGNMENT (dr), vect_body);
1141
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: unaligned supported by "
1145 "hardware.");
1146
1147 break;
1148 }
1149 case dr_explicit_realign:
1150 {
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152 vector_load, stmt_info, 0, vect_body);
1153 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154 vec_perm, stmt_info, 0, vect_body);
1155
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
1158 prologue costs. */
1159 if (targetm.vectorize.builtin_mask_for_load)
1160 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161 stmt_info, 0, vect_body);
1162
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: explicit realign");
1166
1167 break;
1168 }
1169 case dr_explicit_realign_optimized:
1170 {
1171 if (dump_enabled_p ())
1172 dump_printf_loc (MSG_NOTE, vect_location,
1173 "vect_model_load_cost: unaligned software "
1174 "pipelined.");
1175
1176 /* Unaligned software pipeline has a load of an address, an initial
1177 load, and possibly a mask operation to "prime" the loop. However,
1178 if this is an access in a group of loads, which provide grouped
1179 access, then the above cost should only be considered for one
1180 access in the group. Inside the loop, there is a load op
1181 and a realignment op. */
1182
1183 if (add_realign_cost && record_prologue_costs)
1184 {
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
1188 if (targetm.vectorize.builtin_mask_for_load)
1189 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190 vector_stmt, stmt_info,
1191 0, vect_prologue);
1192 }
1193
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195 stmt_info, 0, vect_body);
1196 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197 stmt_info, 0, vect_body);
1198
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign optimized");
1202
1203 break;
1204 }
1205
1206 case dr_unaligned_unsupported:
1207 {
1208 *inside_cost = VECT_MAX_COST;
1209
1210 if (dump_enabled_p ())
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212 "vect_model_load_cost: unsupported access.");
1213 break;
1214 }
1215
1216 default:
1217 gcc_unreachable ();
1218 }
1219 }
1220
1221 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
1223
1224 static void
1225 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1226 {
1227 if (gsi)
1228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1229 else
1230 {
1231 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1233
1234 if (loop_vinfo)
1235 {
1236 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1237 basic_block new_bb;
1238 edge pe;
1239
1240 if (nested_in_vect_loop_p (loop, stmt))
1241 loop = loop->inner;
1242
1243 pe = loop_preheader_edge (loop);
1244 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1245 gcc_assert (!new_bb);
1246 }
1247 else
1248 {
1249 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250 basic_block bb;
1251 gimple_stmt_iterator gsi_bb_start;
1252
1253 gcc_assert (bb_vinfo);
1254 bb = BB_VINFO_BB (bb_vinfo);
1255 gsi_bb_start = gsi_after_labels (bb);
1256 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1257 }
1258 }
1259
1260 if (dump_enabled_p ())
1261 {
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1265 }
1266 }
1267
1268 /* Function vect_init_vector.
1269
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1277
1278 tree
1279 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1280 {
1281 tree new_var;
1282 gimple init_stmt;
1283 tree vec_oprnd;
1284 tree new_temp;
1285
1286 if (TREE_CODE (type) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1288 {
1289 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1290 {
1291 if (CONSTANT_CLASS_P (val))
1292 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1293 else
1294 {
1295 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1296 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1297 new_temp, val,
1298 NULL_TREE);
1299 vect_init_vector_1 (stmt, init_stmt, gsi);
1300 val = new_temp;
1301 }
1302 }
1303 val = build_vector_from_val (type, val);
1304 }
1305
1306 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1307 init_stmt = gimple_build_assign (new_var, val);
1308 new_temp = make_ssa_name (new_var, init_stmt);
1309 gimple_assign_set_lhs (init_stmt, new_temp);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 vec_oprnd = gimple_assign_lhs (init_stmt);
1312 return vec_oprnd;
1313 }
1314
1315
1316 /* Function vect_get_vec_def_for_operand.
1317
1318 OP is an operand in STMT. This function returns a (vector) def that will be
1319 used in the vectorized stmt for STMT.
1320
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1326
1327 tree
1328 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329 {
1330 tree vec_oprnd;
1331 gimple vec_stmt;
1332 gimple def_stmt;
1333 stmt_vec_info def_stmt_info = NULL;
1334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1335 unsigned int nunits;
1336 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1337 tree def;
1338 enum vect_def_type dt;
1339 bool is_simple_use;
1340 tree vector_type;
1341
1342 if (dump_enabled_p ())
1343 {
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1347 }
1348
1349 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350 &def_stmt, &def, &dt);
1351 gcc_assert (is_simple_use);
1352 if (dump_enabled_p ())
1353 {
1354 int loc_printed = 0;
1355 if (def)
1356 {
1357 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1358 loc_printed = 1;
1359 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1360 }
1361 if (def_stmt)
1362 {
1363 if (loc_printed)
1364 dump_printf (MSG_NOTE, " def_stmt = ");
1365 else
1366 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1368 }
1369 }
1370
1371 switch (dt)
1372 {
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def:
1375 {
1376 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377 gcc_assert (vector_type);
1378 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1379
1380 if (scalar_def)
1381 *scalar_def = op;
1382
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE, vect_location,
1386 "Create vector_cst. nunits = %d", nunits);
1387
1388 return vect_init_vector (stmt, op, vector_type, NULL);
1389 }
1390
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
1392 case vect_external_def:
1393 {
1394 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395 gcc_assert (vector_type);
1396
1397 if (scalar_def)
1398 *scalar_def = def;
1399
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
1401 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1403
1404 return vect_init_vector (stmt, def, vector_type, NULL);
1405 }
1406
1407 /* Case 3: operand is defined inside the loop. */
1408 case vect_internal_def:
1409 {
1410 if (scalar_def)
1411 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1412
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info = vinfo_for_stmt (def_stmt);
1415
1416 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1417 /* Get vectorized pattern statement. */
1418 if (!vec_stmt
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info))
1421 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1423 gcc_assert (vec_stmt);
1424 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425 vec_oprnd = PHI_RESULT (vec_stmt);
1426 else if (is_gimple_call (vec_stmt))
1427 vec_oprnd = gimple_call_lhs (vec_stmt);
1428 else
1429 vec_oprnd = gimple_assign_lhs (vec_stmt);
1430 return vec_oprnd;
1431 }
1432
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def:
1435 case vect_double_reduction_def:
1436 case vect_nested_cycle:
1437 {
1438 struct loop *loop;
1439
1440 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1441 loop = (gimple_bb (def_stmt))->loop_father;
1442
1443 /* Get the def before the loop */
1444 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445 return get_initial_def_for_reduction (stmt, op, scalar_def);
1446 }
1447
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def:
1450 {
1451 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1452
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info = vinfo_for_stmt (def_stmt);
1455 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1456 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457 vec_oprnd = PHI_RESULT (vec_stmt);
1458 else
1459 vec_oprnd = gimple_get_lhs (vec_stmt);
1460 return vec_oprnd;
1461 }
1462
1463 default:
1464 gcc_unreachable ();
1465 }
1466 }
1467
1468
1469 /* Function vect_get_vec_def_for_stmt_copy
1470
1471 Return a vector-def for an operand. This function is used when the
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
1474 copies of the vector-stmt are required. In this case the vector-def is
1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476 of the stmt that defines VEC_OPRND.
1477 DT is the type of the vector def VEC_OPRND.
1478
1479 Context:
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
1482 more than one vector stmt to vectorize the scalar stmt. This situation
1483 arises when there are multiple data-types operated upon in the loop; the
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
1487 computing 'VF' results in each iteration). This function is called when
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1490
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1492
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
1496 VS1.3: vx.3 = memref3
1497
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1502
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
1507 get the relevant vector-def for each operand of S2. For operand x it
1508 returns the vector-def 'vx.0'.
1509
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1524
1525 tree
1526 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1527 {
1528 gimple vec_stmt_for_operand;
1529 stmt_vec_info def_stmt_info;
1530
1531 /* Do nothing; can reuse same def. */
1532 if (dt == vect_external_def || dt == vect_constant_def )
1533 return vec_oprnd;
1534
1535 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537 gcc_assert (def_stmt_info);
1538 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539 gcc_assert (vec_stmt_for_operand);
1540 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543 else
1544 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545 return vec_oprnd;
1546 }
1547
1548
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1551
1552 static void
1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1554 vec<tree> *vec_oprnds0,
1555 vec<tree> *vec_oprnds1)
1556 {
1557 tree vec_oprnd = vec_oprnds0->pop ();
1558
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1560 vec_oprnds0->quick_push (vec_oprnd);
1561
1562 if (vec_oprnds1 && vec_oprnds1->length ())
1563 {
1564 vec_oprnd = vec_oprnds1->pop ();
1565 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1566 vec_oprnds1->quick_push (vec_oprnd);
1567 }
1568 }
1569
1570
1571 /* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
1574
1575 void
1576 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1577 vec<tree> *vec_oprnds0,
1578 vec<tree> *vec_oprnds1,
1579 slp_tree slp_node, int reduc_index)
1580 {
1581 if (slp_node)
1582 {
1583 int nops = (op1 == NULL_TREE) ? 1 : 2;
1584 vec<tree> ops;
1585 ops.create (nops);
1586 vec<vec<tree> > vec_defs;
1587 vec_defs.create (nops);
1588
1589 ops.quick_push (op0);
1590 if (op1)
1591 ops.quick_push (op1);
1592
1593 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594
1595 *vec_oprnds0 = vec_defs[0];
1596 if (op1)
1597 *vec_oprnds1 = vec_defs[1];
1598
1599 ops.release ();
1600 vec_defs.release ();
1601 }
1602 else
1603 {
1604 tree vec_oprnd;
1605
1606 vec_oprnds0->create (1);
1607 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1608 vec_oprnds0->quick_push (vec_oprnd);
1609
1610 if (op1)
1611 {
1612 vec_oprnds1->create (1);
1613 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1614 vec_oprnds1->quick_push (vec_oprnd);
1615 }
1616 }
1617 }
1618
1619
1620 /* Function vect_finish_stmt_generation.
1621
1622 Insert a new stmt. */
1623
1624 void
1625 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626 gimple_stmt_iterator *gsi)
1627 {
1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1631
1632 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633
1634 if (!gsi_end_p (*gsi)
1635 && gimple_has_mem_ops (vec_stmt))
1636 {
1637 gimple at_stmt = gsi_stmt (*gsi);
1638 tree vuse = gimple_vuse (at_stmt);
1639 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640 {
1641 tree vdef = gimple_vdef (at_stmt);
1642 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648 && ((is_gimple_assign (vec_stmt)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650 || (is_gimple_call (vec_stmt)
1651 && !(gimple_call_flags (vec_stmt)
1652 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653 {
1654 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655 gimple_set_vdef (vec_stmt, new_vdef);
1656 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657 }
1658 }
1659 }
1660 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661
1662 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1663 bb_vinfo));
1664
1665 if (dump_enabled_p ())
1666 {
1667 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1669 }
1670
1671 gimple_set_location (vec_stmt, gimple_location (stmt));
1672 }
1673
1674 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1677
1678 tree
1679 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680 {
1681 tree fndecl = gimple_call_fndecl (call);
1682
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686 return NULL_TREE;
1687
1688 if (!fndecl
1689 || TREE_CODE (fndecl) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl))
1691 return NULL_TREE;
1692
1693 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1694 vectype_in);
1695 }
1696
1697 /* Function vectorizable_call.
1698
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703
1704 static bool
1705 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706 slp_tree slp_node)
1707 {
1708 tree vec_dest;
1709 tree scalar_dest;
1710 tree op, type;
1711 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713 tree vectype_out, vectype_in;
1714 int nunits_in;
1715 int nunits_out;
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1718 tree fndecl, new_temp, def, rhs_type;
1719 gimple def_stmt;
1720 enum vect_def_type dt[3]
1721 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1722 gimple new_stmt = NULL;
1723 int ncopies, j;
1724 vec<tree> vargs = vNULL;
1725 enum { NARROW, NONE, WIDEN } modifier;
1726 size_t i, nargs;
1727 tree lhs;
1728
1729 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1730 return false;
1731
1732 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1733 return false;
1734
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt))
1737 return false;
1738
1739 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740 return false;
1741
1742 if (stmt_can_throw_internal (stmt))
1743 return false;
1744
1745 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746
1747 /* Process function arguments. */
1748 rhs_type = NULL_TREE;
1749 vectype_in = NULL_TREE;
1750 nargs = gimple_call_num_args (stmt);
1751
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs == 0 || nargs > 3)
1756 return false;
1757
1758 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1759 if (gimple_call_internal_p (stmt)
1760 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1761 {
1762 nargs = 0;
1763 rhs_type = unsigned_type_node;
1764 }
1765
1766 for (i = 0; i < nargs; i++)
1767 {
1768 tree opvectype;
1769
1770 op = gimple_call_arg (stmt, i);
1771
1772 /* We can only handle calls with arguments of the same type. */
1773 if (rhs_type
1774 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1775 {
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "argument types differ.");
1779 return false;
1780 }
1781 if (!rhs_type)
1782 rhs_type = TREE_TYPE (op);
1783
1784 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1785 &def_stmt, &def, &dt[i], &opvectype))
1786 {
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "use not simple.");
1790 return false;
1791 }
1792
1793 if (!vectype_in)
1794 vectype_in = opvectype;
1795 else if (opvectype
1796 && opvectype != vectype_in)
1797 {
1798 if (dump_enabled_p ())
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800 "argument vector types differ.");
1801 return false;
1802 }
1803 }
1804 /* If all arguments are external or constant defs use a vector type with
1805 the same size as the output vector type. */
1806 if (!vectype_in)
1807 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1808 if (vec_stmt)
1809 gcc_assert (vectype_in);
1810 if (!vectype_in)
1811 {
1812 if (dump_enabled_p ())
1813 {
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "no vectype for scalar type ");
1816 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1817 }
1818
1819 return false;
1820 }
1821
1822 /* FORNOW */
1823 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1824 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1825 if (nunits_in == nunits_out / 2)
1826 modifier = NARROW;
1827 else if (nunits_out == nunits_in)
1828 modifier = NONE;
1829 else if (nunits_out == nunits_in / 2)
1830 modifier = WIDEN;
1831 else
1832 return false;
1833
1834 /* For now, we only vectorize functions if a target specific builtin
1835 is available. TODO -- in some cases, it might be profitable to
1836 insert the calls for pieces of the vector, in order to be able
1837 to vectorize other operations in the loop. */
1838 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1839 if (fndecl == NULL_TREE)
1840 {
1841 if (gimple_call_internal_p (stmt)
1842 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1843 && !slp_node
1844 && loop_vinfo
1845 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1846 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1847 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1848 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1849 {
1850 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1851 { 0, 1, 2, ... vf - 1 } vector. */
1852 gcc_assert (nargs == 0);
1853 }
1854 else
1855 {
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858 "function is not vectorizable.");
1859 return false;
1860 }
1861 }
1862
1863 gcc_assert (!gimple_vuse (stmt));
1864
1865 if (slp_node || PURE_SLP_STMT (stmt_info))
1866 ncopies = 1;
1867 else if (modifier == NARROW)
1868 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1869 else
1870 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1871
1872 /* Sanity check: make sure that at least one copy of the vectorized stmt
1873 needs to be generated. */
1874 gcc_assert (ncopies >= 1);
1875
1876 if (!vec_stmt) /* transformation not required. */
1877 {
1878 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1879 if (dump_enabled_p ())
1880 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1881 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1882 return true;
1883 }
1884
1885 /** Transform. **/
1886
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1889
1890 /* Handle def. */
1891 scalar_dest = gimple_call_lhs (stmt);
1892 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1893
1894 prev_stmt_info = NULL;
1895 switch (modifier)
1896 {
1897 case NONE:
1898 for (j = 0; j < ncopies; ++j)
1899 {
1900 /* Build argument list for the vectorized call. */
1901 if (j == 0)
1902 vargs.create (nargs);
1903 else
1904 vargs.truncate (0);
1905
1906 if (slp_node)
1907 {
1908 vec<vec<tree> > vec_defs;
1909 vec_defs.create (nargs);
1910 vec<tree> vec_oprnds0;
1911
1912 for (i = 0; i < nargs; i++)
1913 vargs.quick_push (gimple_call_arg (stmt, i));
1914 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1915 vec_oprnds0 = vec_defs[0];
1916
1917 /* Arguments are ready. Create the new vector stmt. */
1918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1919 {
1920 size_t k;
1921 for (k = 0; k < nargs; k++)
1922 {
1923 vec<tree> vec_oprndsk = vec_defs[k];
1924 vargs[k] = vec_oprndsk[i];
1925 }
1926 new_stmt = gimple_build_call_vec (fndecl, vargs);
1927 new_temp = make_ssa_name (vec_dest, new_stmt);
1928 gimple_call_set_lhs (new_stmt, new_temp);
1929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1931 }
1932
1933 for (i = 0; i < nargs; i++)
1934 {
1935 vec<tree> vec_oprndsi = vec_defs[i];
1936 vec_oprndsi.release ();
1937 }
1938 vec_defs.release ();
1939 continue;
1940 }
1941
1942 for (i = 0; i < nargs; i++)
1943 {
1944 op = gimple_call_arg (stmt, i);
1945 if (j == 0)
1946 vec_oprnd0
1947 = vect_get_vec_def_for_operand (op, stmt, NULL);
1948 else
1949 {
1950 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1951 vec_oprnd0
1952 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1953 }
1954
1955 vargs.quick_push (vec_oprnd0);
1956 }
1957
1958 if (gimple_call_internal_p (stmt)
1959 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1960 {
1961 tree *v = XALLOCAVEC (tree, nunits_out);
1962 int k;
1963 for (k = 0; k < nunits_out; ++k)
1964 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1965 tree cst = build_vector (vectype_out, v);
1966 tree new_var
1967 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1968 gimple init_stmt = gimple_build_assign (new_var, cst);
1969 new_temp = make_ssa_name (new_var, init_stmt);
1970 gimple_assign_set_lhs (init_stmt, new_temp);
1971 vect_init_vector_1 (stmt, init_stmt, NULL);
1972 new_temp = make_ssa_name (vec_dest, NULL);
1973 new_stmt = gimple_build_assign (new_temp,
1974 gimple_assign_lhs (init_stmt));
1975 }
1976 else
1977 {
1978 new_stmt = gimple_build_call_vec (fndecl, vargs);
1979 new_temp = make_ssa_name (vec_dest, new_stmt);
1980 gimple_call_set_lhs (new_stmt, new_temp);
1981 }
1982 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1983
1984 if (j == 0)
1985 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1986 else
1987 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1988
1989 prev_stmt_info = vinfo_for_stmt (new_stmt);
1990 }
1991
1992 break;
1993
1994 case NARROW:
1995 for (j = 0; j < ncopies; ++j)
1996 {
1997 /* Build argument list for the vectorized call. */
1998 if (j == 0)
1999 vargs.create (nargs * 2);
2000 else
2001 vargs.truncate (0);
2002
2003 if (slp_node)
2004 {
2005 vec<vec<tree> > vec_defs;
2006 vec_defs.create (nargs);
2007 vec<tree> vec_oprnds0;
2008
2009 for (i = 0; i < nargs; i++)
2010 vargs.quick_push (gimple_call_arg (stmt, i));
2011 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2012 vec_oprnds0 = vec_defs[0];
2013
2014 /* Arguments are ready. Create the new vector stmt. */
2015 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2016 {
2017 size_t k;
2018 vargs.truncate (0);
2019 for (k = 0; k < nargs; k++)
2020 {
2021 vec<tree> vec_oprndsk = vec_defs[k];
2022 vargs.quick_push (vec_oprndsk[i]);
2023 vargs.quick_push (vec_oprndsk[i + 1]);
2024 }
2025 new_stmt = gimple_build_call_vec (fndecl, vargs);
2026 new_temp = make_ssa_name (vec_dest, new_stmt);
2027 gimple_call_set_lhs (new_stmt, new_temp);
2028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2029 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2030 }
2031
2032 for (i = 0; i < nargs; i++)
2033 {
2034 vec<tree> vec_oprndsi = vec_defs[i];
2035 vec_oprndsi.release ();
2036 }
2037 vec_defs.release ();
2038 continue;
2039 }
2040
2041 for (i = 0; i < nargs; i++)
2042 {
2043 op = gimple_call_arg (stmt, i);
2044 if (j == 0)
2045 {
2046 vec_oprnd0
2047 = vect_get_vec_def_for_operand (op, stmt, NULL);
2048 vec_oprnd1
2049 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2050 }
2051 else
2052 {
2053 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2054 vec_oprnd0
2055 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2056 vec_oprnd1
2057 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2058 }
2059
2060 vargs.quick_push (vec_oprnd0);
2061 vargs.quick_push (vec_oprnd1);
2062 }
2063
2064 new_stmt = gimple_build_call_vec (fndecl, vargs);
2065 new_temp = make_ssa_name (vec_dest, new_stmt);
2066 gimple_call_set_lhs (new_stmt, new_temp);
2067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2068
2069 if (j == 0)
2070 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2071 else
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2073
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2075 }
2076
2077 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2078
2079 break;
2080
2081 case WIDEN:
2082 /* No current target implements this case. */
2083 return false;
2084 }
2085
2086 vargs.release ();
2087
2088 /* Update the exception handling table with the vector stmt if necessary. */
2089 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2090 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2091
2092 /* The call in STMT might prevent it from being removed in dce.
2093 We however cannot remove it here, due to the way the ssa name
2094 it defines is mapped to the new definition. So just replace
2095 rhs of the statement with something harmless. */
2096
2097 if (slp_node)
2098 return true;
2099
2100 type = TREE_TYPE (scalar_dest);
2101 if (is_pattern_stmt_p (stmt_info))
2102 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2103 else
2104 lhs = gimple_call_lhs (stmt);
2105 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2106 set_vinfo_for_stmt (new_stmt, stmt_info);
2107 set_vinfo_for_stmt (stmt, NULL);
2108 STMT_VINFO_STMT (stmt_info) = new_stmt;
2109 gsi_replace (gsi, new_stmt, false);
2110 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2111
2112 return true;
2113 }
2114
2115
2116 /* Function vect_gen_widened_results_half
2117
2118 Create a vector stmt whose code, type, number of arguments, and result
2119 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2120 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2121 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2122 needs to be created (DECL is a function-decl of a target-builtin).
2123 STMT is the original scalar stmt that we are vectorizing. */
2124
2125 static gimple
2126 vect_gen_widened_results_half (enum tree_code code,
2127 tree decl,
2128 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2129 tree vec_dest, gimple_stmt_iterator *gsi,
2130 gimple stmt)
2131 {
2132 gimple new_stmt;
2133 tree new_temp;
2134
2135 /* Generate half of the widened result: */
2136 if (code == CALL_EXPR)
2137 {
2138 /* Target specific support */
2139 if (op_type == binary_op)
2140 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2141 else
2142 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2143 new_temp = make_ssa_name (vec_dest, new_stmt);
2144 gimple_call_set_lhs (new_stmt, new_temp);
2145 }
2146 else
2147 {
2148 /* Generic support */
2149 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2150 if (op_type != binary_op)
2151 vec_oprnd1 = NULL;
2152 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2153 vec_oprnd1);
2154 new_temp = make_ssa_name (vec_dest, new_stmt);
2155 gimple_assign_set_lhs (new_stmt, new_temp);
2156 }
2157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2158
2159 return new_stmt;
2160 }
2161
2162
2163 /* Get vectorized definitions for loop-based vectorization. For the first
2164 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2165 scalar operand), and for the rest we get a copy with
2166 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2167 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2168 The vectors are collected into VEC_OPRNDS. */
2169
2170 static void
2171 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2172 vec<tree> *vec_oprnds, int multi_step_cvt)
2173 {
2174 tree vec_oprnd;
2175
2176 /* Get first vector operand. */
2177 /* All the vector operands except the very first one (that is scalar oprnd)
2178 are stmt copies. */
2179 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2180 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2181 else
2182 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2183
2184 vec_oprnds->quick_push (vec_oprnd);
2185
2186 /* Get second vector operand. */
2187 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2188 vec_oprnds->quick_push (vec_oprnd);
2189
2190 *oprnd = vec_oprnd;
2191
2192 /* For conversion in multiple steps, continue to get operands
2193 recursively. */
2194 if (multi_step_cvt)
2195 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2196 }
2197
2198
2199 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2200 For multi-step conversions store the resulting vectors and call the function
2201 recursively. */
2202
2203 static void
2204 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2205 int multi_step_cvt, gimple stmt,
2206 vec<tree> vec_dsts,
2207 gimple_stmt_iterator *gsi,
2208 slp_tree slp_node, enum tree_code code,
2209 stmt_vec_info *prev_stmt_info)
2210 {
2211 unsigned int i;
2212 tree vop0, vop1, new_tmp, vec_dest;
2213 gimple new_stmt;
2214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2215
2216 vec_dest = vec_dsts.pop ();
2217
2218 for (i = 0; i < vec_oprnds->length (); i += 2)
2219 {
2220 /* Create demotion operation. */
2221 vop0 = (*vec_oprnds)[i];
2222 vop1 = (*vec_oprnds)[i + 1];
2223 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2224 new_tmp = make_ssa_name (vec_dest, new_stmt);
2225 gimple_assign_set_lhs (new_stmt, new_tmp);
2226 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2227
2228 if (multi_step_cvt)
2229 /* Store the resulting vector for next recursive call. */
2230 (*vec_oprnds)[i/2] = new_tmp;
2231 else
2232 {
2233 /* This is the last step of the conversion sequence. Store the
2234 vectors in SLP_NODE or in vector info of the scalar statement
2235 (or in STMT_VINFO_RELATED_STMT chain). */
2236 if (slp_node)
2237 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2238 else
2239 {
2240 if (!*prev_stmt_info)
2241 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2242 else
2243 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2244
2245 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2246 }
2247 }
2248 }
2249
2250 /* For multi-step demotion operations we first generate demotion operations
2251 from the source type to the intermediate types, and then combine the
2252 results (stored in VEC_OPRNDS) in demotion operation to the destination
2253 type. */
2254 if (multi_step_cvt)
2255 {
2256 /* At each level of recursion we have half of the operands we had at the
2257 previous level. */
2258 vec_oprnds->truncate ((i+1)/2);
2259 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2260 stmt, vec_dsts, gsi, slp_node,
2261 VEC_PACK_TRUNC_EXPR,
2262 prev_stmt_info);
2263 }
2264
2265 vec_dsts.quick_push (vec_dest);
2266 }
2267
2268
2269 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2270 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2271 the resulting vectors and call the function recursively. */
2272
2273 static void
2274 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2275 vec<tree> *vec_oprnds1,
2276 gimple stmt, tree vec_dest,
2277 gimple_stmt_iterator *gsi,
2278 enum tree_code code1,
2279 enum tree_code code2, tree decl1,
2280 tree decl2, int op_type)
2281 {
2282 int i;
2283 tree vop0, vop1, new_tmp1, new_tmp2;
2284 gimple new_stmt1, new_stmt2;
2285 vec<tree> vec_tmp = vNULL;
2286
2287 vec_tmp.create (vec_oprnds0->length () * 2);
2288 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2289 {
2290 if (op_type == binary_op)
2291 vop1 = (*vec_oprnds1)[i];
2292 else
2293 vop1 = NULL_TREE;
2294
2295 /* Generate the two halves of promotion operation. */
2296 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2297 op_type, vec_dest, gsi, stmt);
2298 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2299 op_type, vec_dest, gsi, stmt);
2300 if (is_gimple_call (new_stmt1))
2301 {
2302 new_tmp1 = gimple_call_lhs (new_stmt1);
2303 new_tmp2 = gimple_call_lhs (new_stmt2);
2304 }
2305 else
2306 {
2307 new_tmp1 = gimple_assign_lhs (new_stmt1);
2308 new_tmp2 = gimple_assign_lhs (new_stmt2);
2309 }
2310
2311 /* Store the results for the next step. */
2312 vec_tmp.quick_push (new_tmp1);
2313 vec_tmp.quick_push (new_tmp2);
2314 }
2315
2316 vec_oprnds0->release ();
2317 *vec_oprnds0 = vec_tmp;
2318 }
2319
2320
2321 /* Check if STMT performs a conversion operation, that can be vectorized.
2322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2323 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2325
2326 static bool
2327 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2328 gimple *vec_stmt, slp_tree slp_node)
2329 {
2330 tree vec_dest;
2331 tree scalar_dest;
2332 tree op0, op1 = NULL_TREE;
2333 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2334 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2335 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2336 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2337 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2338 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2339 tree new_temp;
2340 tree def;
2341 gimple def_stmt;
2342 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2343 gimple new_stmt = NULL;
2344 stmt_vec_info prev_stmt_info;
2345 int nunits_in;
2346 int nunits_out;
2347 tree vectype_out, vectype_in;
2348 int ncopies, i, j;
2349 tree lhs_type, rhs_type;
2350 enum { NARROW, NONE, WIDEN } modifier;
2351 vec<tree> vec_oprnds0 = vNULL;
2352 vec<tree> vec_oprnds1 = vNULL;
2353 tree vop0;
2354 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2355 int multi_step_cvt = 0;
2356 vec<tree> vec_dsts = vNULL;
2357 vec<tree> interm_types = vNULL;
2358 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2359 int op_type;
2360 enum machine_mode rhs_mode;
2361 unsigned short fltsz;
2362
2363 /* Is STMT a vectorizable conversion? */
2364
2365 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2366 return false;
2367
2368 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2369 return false;
2370
2371 if (!is_gimple_assign (stmt))
2372 return false;
2373
2374 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2375 return false;
2376
2377 code = gimple_assign_rhs_code (stmt);
2378 if (!CONVERT_EXPR_CODE_P (code)
2379 && code != FIX_TRUNC_EXPR
2380 && code != FLOAT_EXPR
2381 && code != WIDEN_MULT_EXPR
2382 && code != WIDEN_LSHIFT_EXPR)
2383 return false;
2384
2385 op_type = TREE_CODE_LENGTH (code);
2386
2387 /* Check types of lhs and rhs. */
2388 scalar_dest = gimple_assign_lhs (stmt);
2389 lhs_type = TREE_TYPE (scalar_dest);
2390 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2391
2392 op0 = gimple_assign_rhs1 (stmt);
2393 rhs_type = TREE_TYPE (op0);
2394
2395 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2396 && !((INTEGRAL_TYPE_P (lhs_type)
2397 && INTEGRAL_TYPE_P (rhs_type))
2398 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2399 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2400 return false;
2401
2402 if ((INTEGRAL_TYPE_P (lhs_type)
2403 && (TYPE_PRECISION (lhs_type)
2404 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2405 || (INTEGRAL_TYPE_P (rhs_type)
2406 && (TYPE_PRECISION (rhs_type)
2407 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2408 {
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "type conversion to/from bit-precision unsupported.");
2412 return false;
2413 }
2414
2415 /* Check the operands of the operation. */
2416 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2417 &def_stmt, &def, &dt[0], &vectype_in))
2418 {
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 "use not simple.");
2422 return false;
2423 }
2424 if (op_type == binary_op)
2425 {
2426 bool ok;
2427
2428 op1 = gimple_assign_rhs2 (stmt);
2429 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2430 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2431 OP1. */
2432 if (CONSTANT_CLASS_P (op0))
2433 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2434 &def_stmt, &def, &dt[1], &vectype_in);
2435 else
2436 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2437 &def, &dt[1]);
2438
2439 if (!ok)
2440 {
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "use not simple.");
2444 return false;
2445 }
2446 }
2447
2448 /* If op0 is an external or constant defs use a vector type of
2449 the same size as the output vector type. */
2450 if (!vectype_in)
2451 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2452 if (vec_stmt)
2453 gcc_assert (vectype_in);
2454 if (!vectype_in)
2455 {
2456 if (dump_enabled_p ())
2457 {
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "no vectype for scalar type ");
2460 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2461 }
2462
2463 return false;
2464 }
2465
2466 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2467 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2468 if (nunits_in < nunits_out)
2469 modifier = NARROW;
2470 else if (nunits_out == nunits_in)
2471 modifier = NONE;
2472 else
2473 modifier = WIDEN;
2474
2475 /* Multiple types in SLP are handled by creating the appropriate number of
2476 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2477 case of SLP. */
2478 if (slp_node || PURE_SLP_STMT (stmt_info))
2479 ncopies = 1;
2480 else if (modifier == NARROW)
2481 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2482 else
2483 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2484
2485 /* Sanity check: make sure that at least one copy of the vectorized stmt
2486 needs to be generated. */
2487 gcc_assert (ncopies >= 1);
2488
2489 /* Supportable by target? */
2490 switch (modifier)
2491 {
2492 case NONE:
2493 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2494 return false;
2495 if (supportable_convert_operation (code, vectype_out, vectype_in,
2496 &decl1, &code1))
2497 break;
2498 /* FALLTHRU */
2499 unsupported:
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 "conversion not supported by target.");
2503 return false;
2504
2505 case WIDEN:
2506 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2507 &code1, &code2, &multi_step_cvt,
2508 &interm_types))
2509 {
2510 /* Binary widening operation can only be supported directly by the
2511 architecture. */
2512 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2513 break;
2514 }
2515
2516 if (code != FLOAT_EXPR
2517 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2518 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2519 goto unsupported;
2520
2521 rhs_mode = TYPE_MODE (rhs_type);
2522 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2523 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2524 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2525 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2526 {
2527 cvt_type
2528 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2529 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2530 if (cvt_type == NULL_TREE)
2531 goto unsupported;
2532
2533 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2534 {
2535 if (!supportable_convert_operation (code, vectype_out,
2536 cvt_type, &decl1, &codecvt1))
2537 goto unsupported;
2538 }
2539 else if (!supportable_widening_operation (code, stmt, vectype_out,
2540 cvt_type, &codecvt1,
2541 &codecvt2, &multi_step_cvt,
2542 &interm_types))
2543 continue;
2544 else
2545 gcc_assert (multi_step_cvt == 0);
2546
2547 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2548 vectype_in, &code1, &code2,
2549 &multi_step_cvt, &interm_types))
2550 break;
2551 }
2552
2553 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2554 goto unsupported;
2555
2556 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2557 codecvt2 = ERROR_MARK;
2558 else
2559 {
2560 multi_step_cvt++;
2561 interm_types.safe_push (cvt_type);
2562 cvt_type = NULL_TREE;
2563 }
2564 break;
2565
2566 case NARROW:
2567 gcc_assert (op_type == unary_op);
2568 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2569 &code1, &multi_step_cvt,
2570 &interm_types))
2571 break;
2572
2573 if (code != FIX_TRUNC_EXPR
2574 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2575 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2576 goto unsupported;
2577
2578 rhs_mode = TYPE_MODE (rhs_type);
2579 cvt_type
2580 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2581 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2582 if (cvt_type == NULL_TREE)
2583 goto unsupported;
2584 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2585 &decl1, &codecvt1))
2586 goto unsupported;
2587 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2588 &code1, &multi_step_cvt,
2589 &interm_types))
2590 break;
2591 goto unsupported;
2592
2593 default:
2594 gcc_unreachable ();
2595 }
2596
2597 if (!vec_stmt) /* transformation not required. */
2598 {
2599 if (dump_enabled_p ())
2600 dump_printf_loc (MSG_NOTE, vect_location,
2601 "=== vectorizable_conversion ===");
2602 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2603 {
2604 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2605 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2606 }
2607 else if (modifier == NARROW)
2608 {
2609 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2610 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2611 }
2612 else
2613 {
2614 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2615 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2616 }
2617 interm_types.release ();
2618 return true;
2619 }
2620
2621 /** Transform. **/
2622 if (dump_enabled_p ())
2623 dump_printf_loc (MSG_NOTE, vect_location,
2624 "transform conversion. ncopies = %d.", ncopies);
2625
2626 if (op_type == binary_op)
2627 {
2628 if (CONSTANT_CLASS_P (op0))
2629 op0 = fold_convert (TREE_TYPE (op1), op0);
2630 else if (CONSTANT_CLASS_P (op1))
2631 op1 = fold_convert (TREE_TYPE (op0), op1);
2632 }
2633
2634 /* In case of multi-step conversion, we first generate conversion operations
2635 to the intermediate types, and then from that types to the final one.
2636 We create vector destinations for the intermediate type (TYPES) received
2637 from supportable_*_operation, and store them in the correct order
2638 for future use in vect_create_vectorized_*_stmts (). */
2639 vec_dsts.create (multi_step_cvt + 1);
2640 vec_dest = vect_create_destination_var (scalar_dest,
2641 (cvt_type && modifier == WIDEN)
2642 ? cvt_type : vectype_out);
2643 vec_dsts.quick_push (vec_dest);
2644
2645 if (multi_step_cvt)
2646 {
2647 for (i = interm_types.length () - 1;
2648 interm_types.iterate (i, &intermediate_type); i--)
2649 {
2650 vec_dest = vect_create_destination_var (scalar_dest,
2651 intermediate_type);
2652 vec_dsts.quick_push (vec_dest);
2653 }
2654 }
2655
2656 if (cvt_type)
2657 vec_dest = vect_create_destination_var (scalar_dest,
2658 modifier == WIDEN
2659 ? vectype_out : cvt_type);
2660
2661 if (!slp_node)
2662 {
2663 if (modifier == WIDEN)
2664 {
2665 vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
2666 if (op_type == binary_op)
2667 vec_oprnds1.create (1);
2668 }
2669 else if (modifier == NARROW)
2670 vec_oprnds0.create (
2671 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2672 }
2673 else if (code == WIDEN_LSHIFT_EXPR)
2674 vec_oprnds1.create (slp_node->vec_stmts_size);
2675
2676 last_oprnd = op0;
2677 prev_stmt_info = NULL;
2678 switch (modifier)
2679 {
2680 case NONE:
2681 for (j = 0; j < ncopies; j++)
2682 {
2683 if (j == 0)
2684 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2685 -1);
2686 else
2687 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2688
2689 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2690 {
2691 /* Arguments are ready, create the new vector stmt. */
2692 if (code1 == CALL_EXPR)
2693 {
2694 new_stmt = gimple_build_call (decl1, 1, vop0);
2695 new_temp = make_ssa_name (vec_dest, new_stmt);
2696 gimple_call_set_lhs (new_stmt, new_temp);
2697 }
2698 else
2699 {
2700 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2701 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2702 vop0, NULL);
2703 new_temp = make_ssa_name (vec_dest, new_stmt);
2704 gimple_assign_set_lhs (new_stmt, new_temp);
2705 }
2706
2707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2708 if (slp_node)
2709 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2710 }
2711
2712 if (j == 0)
2713 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2714 else
2715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2716 prev_stmt_info = vinfo_for_stmt (new_stmt);
2717 }
2718 break;
2719
2720 case WIDEN:
2721 /* In case the vectorization factor (VF) is bigger than the number
2722 of elements that we can fit in a vectype (nunits), we have to
2723 generate more than one vector stmt - i.e - we need to "unroll"
2724 the vector stmt by a factor VF/nunits. */
2725 for (j = 0; j < ncopies; j++)
2726 {
2727 /* Handle uses. */
2728 if (j == 0)
2729 {
2730 if (slp_node)
2731 {
2732 if (code == WIDEN_LSHIFT_EXPR)
2733 {
2734 unsigned int k;
2735
2736 vec_oprnd1 = op1;
2737 /* Store vec_oprnd1 for every vector stmt to be created
2738 for SLP_NODE. We check during the analysis that all
2739 the shift arguments are the same. */
2740 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2741 vec_oprnds1.quick_push (vec_oprnd1);
2742
2743 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2744 slp_node, -1);
2745 }
2746 else
2747 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2748 &vec_oprnds1, slp_node, -1);
2749 }
2750 else
2751 {
2752 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2753 vec_oprnds0.quick_push (vec_oprnd0);
2754 if (op_type == binary_op)
2755 {
2756 if (code == WIDEN_LSHIFT_EXPR)
2757 vec_oprnd1 = op1;
2758 else
2759 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2760 NULL);
2761 vec_oprnds1.quick_push (vec_oprnd1);
2762 }
2763 }
2764 }
2765 else
2766 {
2767 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2768 vec_oprnds0.truncate (0);
2769 vec_oprnds0.quick_push (vec_oprnd0);
2770 if (op_type == binary_op)
2771 {
2772 if (code == WIDEN_LSHIFT_EXPR)
2773 vec_oprnd1 = op1;
2774 else
2775 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2776 vec_oprnd1);
2777 vec_oprnds1.truncate (0);
2778 vec_oprnds1.quick_push (vec_oprnd1);
2779 }
2780 }
2781
2782 /* Arguments are ready. Create the new vector stmts. */
2783 for (i = multi_step_cvt; i >= 0; i--)
2784 {
2785 tree this_dest = vec_dsts[i];
2786 enum tree_code c1 = code1, c2 = code2;
2787 if (i == 0 && codecvt2 != ERROR_MARK)
2788 {
2789 c1 = codecvt1;
2790 c2 = codecvt2;
2791 }
2792 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2793 &vec_oprnds1,
2794 stmt, this_dest, gsi,
2795 c1, c2, decl1, decl2,
2796 op_type);
2797 }
2798
2799 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2800 {
2801 if (cvt_type)
2802 {
2803 if (codecvt1 == CALL_EXPR)
2804 {
2805 new_stmt = gimple_build_call (decl1, 1, vop0);
2806 new_temp = make_ssa_name (vec_dest, new_stmt);
2807 gimple_call_set_lhs (new_stmt, new_temp);
2808 }
2809 else
2810 {
2811 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2812 new_temp = make_ssa_name (vec_dest, NULL);
2813 new_stmt = gimple_build_assign_with_ops (codecvt1,
2814 new_temp,
2815 vop0, NULL);
2816 }
2817
2818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2819 }
2820 else
2821 new_stmt = SSA_NAME_DEF_STMT (vop0);
2822
2823 if (slp_node)
2824 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2825 else
2826 {
2827 if (!prev_stmt_info)
2828 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2829 else
2830 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2831 prev_stmt_info = vinfo_for_stmt (new_stmt);
2832 }
2833 }
2834 }
2835
2836 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2837 break;
2838
2839 case NARROW:
2840 /* In case the vectorization factor (VF) is bigger than the number
2841 of elements that we can fit in a vectype (nunits), we have to
2842 generate more than one vector stmt - i.e - we need to "unroll"
2843 the vector stmt by a factor VF/nunits. */
2844 for (j = 0; j < ncopies; j++)
2845 {
2846 /* Handle uses. */
2847 if (slp_node)
2848 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2849 slp_node, -1);
2850 else
2851 {
2852 vec_oprnds0.truncate (0);
2853 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2854 vect_pow2 (multi_step_cvt) - 1);
2855 }
2856
2857 /* Arguments are ready. Create the new vector stmts. */
2858 if (cvt_type)
2859 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2860 {
2861 if (codecvt1 == CALL_EXPR)
2862 {
2863 new_stmt = gimple_build_call (decl1, 1, vop0);
2864 new_temp = make_ssa_name (vec_dest, new_stmt);
2865 gimple_call_set_lhs (new_stmt, new_temp);
2866 }
2867 else
2868 {
2869 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2870 new_temp = make_ssa_name (vec_dest, NULL);
2871 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2872 vop0, NULL);
2873 }
2874
2875 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2876 vec_oprnds0[i] = new_temp;
2877 }
2878
2879 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2880 stmt, vec_dsts, gsi,
2881 slp_node, code1,
2882 &prev_stmt_info);
2883 }
2884
2885 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2886 break;
2887 }
2888
2889 vec_oprnds0.release ();
2890 vec_oprnds1.release ();
2891 vec_dsts.release ();
2892 interm_types.release ();
2893
2894 return true;
2895 }
2896
2897
2898 /* Function vectorizable_assignment.
2899
2900 Check if STMT performs an assignment (copy) that can be vectorized.
2901 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2902 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2903 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2904
2905 static bool
2906 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2907 gimple *vec_stmt, slp_tree slp_node)
2908 {
2909 tree vec_dest;
2910 tree scalar_dest;
2911 tree op;
2912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2913 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2914 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2915 tree new_temp;
2916 tree def;
2917 gimple def_stmt;
2918 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2919 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2920 int ncopies;
2921 int i, j;
2922 vec<tree> vec_oprnds = vNULL;
2923 tree vop;
2924 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2925 gimple new_stmt = NULL;
2926 stmt_vec_info prev_stmt_info = NULL;
2927 enum tree_code code;
2928 tree vectype_in;
2929
2930 /* Multiple types in SLP are handled by creating the appropriate number of
2931 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2932 case of SLP. */
2933 if (slp_node || PURE_SLP_STMT (stmt_info))
2934 ncopies = 1;
2935 else
2936 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2937
2938 gcc_assert (ncopies >= 1);
2939
2940 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2941 return false;
2942
2943 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2944 return false;
2945
2946 /* Is vectorizable assignment? */
2947 if (!is_gimple_assign (stmt))
2948 return false;
2949
2950 scalar_dest = gimple_assign_lhs (stmt);
2951 if (TREE_CODE (scalar_dest) != SSA_NAME)
2952 return false;
2953
2954 code = gimple_assign_rhs_code (stmt);
2955 if (gimple_assign_single_p (stmt)
2956 || code == PAREN_EXPR
2957 || CONVERT_EXPR_CODE_P (code))
2958 op = gimple_assign_rhs1 (stmt);
2959 else
2960 return false;
2961
2962 if (code == VIEW_CONVERT_EXPR)
2963 op = TREE_OPERAND (op, 0);
2964
2965 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2966 &def_stmt, &def, &dt[0], &vectype_in))
2967 {
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "use not simple.");
2971 return false;
2972 }
2973
2974 /* We can handle NOP_EXPR conversions that do not change the number
2975 of elements or the vector size. */
2976 if ((CONVERT_EXPR_CODE_P (code)
2977 || code == VIEW_CONVERT_EXPR)
2978 && (!vectype_in
2979 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2980 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2981 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2982 return false;
2983
2984 /* We do not handle bit-precision changes. */
2985 if ((CONVERT_EXPR_CODE_P (code)
2986 || code == VIEW_CONVERT_EXPR)
2987 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2988 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2989 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2990 || ((TYPE_PRECISION (TREE_TYPE (op))
2991 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2992 /* But a conversion that does not change the bit-pattern is ok. */
2993 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2994 > TYPE_PRECISION (TREE_TYPE (op)))
2995 && TYPE_UNSIGNED (TREE_TYPE (op))))
2996 {
2997 if (dump_enabled_p ())
2998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2999 "type conversion to/from bit-precision "
3000 "unsupported.");
3001 return false;
3002 }
3003
3004 if (!vec_stmt) /* transformation not required. */
3005 {
3006 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3007 if (dump_enabled_p ())
3008 dump_printf_loc (MSG_NOTE, vect_location,
3009 "=== vectorizable_assignment ===");
3010 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3011 return true;
3012 }
3013
3014 /** Transform. **/
3015 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
3017
3018 /* Handle def. */
3019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3020
3021 /* Handle use. */
3022 for (j = 0; j < ncopies; j++)
3023 {
3024 /* Handle uses. */
3025 if (j == 0)
3026 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
3027 else
3028 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3029
3030 /* Arguments are ready. create the new vector stmt. */
3031 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3032 {
3033 if (CONVERT_EXPR_CODE_P (code)
3034 || code == VIEW_CONVERT_EXPR)
3035 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
3036 new_stmt = gimple_build_assign (vec_dest, vop);
3037 new_temp = make_ssa_name (vec_dest, new_stmt);
3038 gimple_assign_set_lhs (new_stmt, new_temp);
3039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3040 if (slp_node)
3041 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3042 }
3043
3044 if (slp_node)
3045 continue;
3046
3047 if (j == 0)
3048 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3049 else
3050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3051
3052 prev_stmt_info = vinfo_for_stmt (new_stmt);
3053 }
3054
3055 vec_oprnds.release ();
3056 return true;
3057 }
3058
3059
3060 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3061 either as shift by a scalar or by a vector. */
3062
3063 bool
3064 vect_supportable_shift (enum tree_code code, tree scalar_type)
3065 {
3066
3067 enum machine_mode vec_mode;
3068 optab optab;
3069 int icode;
3070 tree vectype;
3071
3072 vectype = get_vectype_for_scalar_type (scalar_type);
3073 if (!vectype)
3074 return false;
3075
3076 optab = optab_for_tree_code (code, vectype, optab_scalar);
3077 if (!optab
3078 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3079 {
3080 optab = optab_for_tree_code (code, vectype, optab_vector);
3081 if (!optab
3082 || (optab_handler (optab, TYPE_MODE (vectype))
3083 == CODE_FOR_nothing))
3084 return false;
3085 }
3086
3087 vec_mode = TYPE_MODE (vectype);
3088 icode = (int) optab_handler (optab, vec_mode);
3089 if (icode == CODE_FOR_nothing)
3090 return false;
3091
3092 return true;
3093 }
3094
3095
3096 /* Function vectorizable_shift.
3097
3098 Check if STMT performs a shift operation that can be vectorized.
3099 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3100 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3101 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3102
3103 static bool
3104 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3105 gimple *vec_stmt, slp_tree slp_node)
3106 {
3107 tree vec_dest;
3108 tree scalar_dest;
3109 tree op0, op1 = NULL;
3110 tree vec_oprnd1 = NULL_TREE;
3111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3112 tree vectype;
3113 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3114 enum tree_code code;
3115 enum machine_mode vec_mode;
3116 tree new_temp;
3117 optab optab;
3118 int icode;
3119 enum machine_mode optab_op2_mode;
3120 tree def;
3121 gimple def_stmt;
3122 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3123 gimple new_stmt = NULL;
3124 stmt_vec_info prev_stmt_info;
3125 int nunits_in;
3126 int nunits_out;
3127 tree vectype_out;
3128 tree op1_vectype;
3129 int ncopies;
3130 int j, i;
3131 vec<tree> vec_oprnds0 = vNULL;
3132 vec<tree> vec_oprnds1 = vNULL;
3133 tree vop0, vop1;
3134 unsigned int k;
3135 bool scalar_shift_arg = true;
3136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3137 int vf;
3138
3139 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3140 return false;
3141
3142 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3143 return false;
3144
3145 /* Is STMT a vectorizable binary/unary operation? */
3146 if (!is_gimple_assign (stmt))
3147 return false;
3148
3149 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3150 return false;
3151
3152 code = gimple_assign_rhs_code (stmt);
3153
3154 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3155 || code == RROTATE_EXPR))
3156 return false;
3157
3158 scalar_dest = gimple_assign_lhs (stmt);
3159 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3160 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3161 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3162 {
3163 if (dump_enabled_p ())
3164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3165 "bit-precision shifts not supported.");
3166 return false;
3167 }
3168
3169 op0 = gimple_assign_rhs1 (stmt);
3170 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3171 &def_stmt, &def, &dt[0], &vectype))
3172 {
3173 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3175 "use not simple.");
3176 return false;
3177 }
3178 /* If op0 is an external or constant def use a vector type with
3179 the same size as the output vector type. */
3180 if (!vectype)
3181 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3182 if (vec_stmt)
3183 gcc_assert (vectype);
3184 if (!vectype)
3185 {
3186 if (dump_enabled_p ())
3187 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3188 "no vectype for scalar type ");
3189 return false;
3190 }
3191
3192 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3193 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3194 if (nunits_out != nunits_in)
3195 return false;
3196
3197 op1 = gimple_assign_rhs2 (stmt);
3198 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3199 &def, &dt[1], &op1_vectype))
3200 {
3201 if (dump_enabled_p ())
3202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3203 "use not simple.");
3204 return false;
3205 }
3206
3207 if (loop_vinfo)
3208 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3209 else
3210 vf = 1;
3211
3212 /* Multiple types in SLP are handled by creating the appropriate number of
3213 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3214 case of SLP. */
3215 if (slp_node || PURE_SLP_STMT (stmt_info))
3216 ncopies = 1;
3217 else
3218 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3219
3220 gcc_assert (ncopies >= 1);
3221
3222 /* Determine whether the shift amount is a vector, or scalar. If the
3223 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3224
3225 if (dt[1] == vect_internal_def && !slp_node)
3226 scalar_shift_arg = false;
3227 else if (dt[1] == vect_constant_def
3228 || dt[1] == vect_external_def
3229 || dt[1] == vect_internal_def)
3230 {
3231 /* In SLP, need to check whether the shift count is the same,
3232 in loops if it is a constant or invariant, it is always
3233 a scalar shift. */
3234 if (slp_node)
3235 {
3236 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3237 gimple slpstmt;
3238
3239 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3240 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3241 scalar_shift_arg = false;
3242 }
3243 }
3244 else
3245 {
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3248 "operand mode requires invariant argument.");
3249 return false;
3250 }
3251
3252 /* Vector shifted by vector. */
3253 if (!scalar_shift_arg)
3254 {
3255 optab = optab_for_tree_code (code, vectype, optab_vector);
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_NOTE, vect_location,
3258 "vector/vector shift/rotate found.");
3259
3260 if (!op1_vectype)
3261 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3262 if (op1_vectype == NULL_TREE
3263 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3264 {
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3267 "unusable type for last operand in"
3268 " vector/vector shift/rotate.");
3269 return false;
3270 }
3271 }
3272 /* See if the machine has a vector shifted by scalar insn and if not
3273 then see if it has a vector shifted by vector insn. */
3274 else
3275 {
3276 optab = optab_for_tree_code (code, vectype, optab_scalar);
3277 if (optab
3278 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3279 {
3280 if (dump_enabled_p ())
3281 dump_printf_loc (MSG_NOTE, vect_location,
3282 "vector/scalar shift/rotate found.");
3283 }
3284 else
3285 {
3286 optab = optab_for_tree_code (code, vectype, optab_vector);
3287 if (optab
3288 && (optab_handler (optab, TYPE_MODE (vectype))
3289 != CODE_FOR_nothing))
3290 {
3291 scalar_shift_arg = false;
3292
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE, vect_location,
3295 "vector/vector shift/rotate found.");
3296
3297 /* Unlike the other binary operators, shifts/rotates have
3298 the rhs being int, instead of the same type as the lhs,
3299 so make sure the scalar is the right type if we are
3300 dealing with vectors of long long/long/short/char. */
3301 if (dt[1] == vect_constant_def)
3302 op1 = fold_convert (TREE_TYPE (vectype), op1);
3303 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3304 TREE_TYPE (op1)))
3305 {
3306 if (slp_node
3307 && TYPE_MODE (TREE_TYPE (vectype))
3308 != TYPE_MODE (TREE_TYPE (op1)))
3309 {
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3312 "unusable type for last operand in"
3313 " vector/vector shift/rotate.");
3314 return false;
3315 }
3316 if (vec_stmt && !slp_node)
3317 {
3318 op1 = fold_convert (TREE_TYPE (vectype), op1);
3319 op1 = vect_init_vector (stmt, op1,
3320 TREE_TYPE (vectype), NULL);
3321 }
3322 }
3323 }
3324 }
3325 }
3326
3327 /* Supportable by target? */
3328 if (!optab)
3329 {
3330 if (dump_enabled_p ())
3331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3332 "no optab.");
3333 return false;
3334 }
3335 vec_mode = TYPE_MODE (vectype);
3336 icode = (int) optab_handler (optab, vec_mode);
3337 if (icode == CODE_FOR_nothing)
3338 {
3339 if (dump_enabled_p ())
3340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3341 "op not supported by target.");
3342 /* Check only during analysis. */
3343 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3344 || (vf < vect_min_worthwhile_factor (code)
3345 && !vec_stmt))
3346 return false;
3347 if (dump_enabled_p ())
3348 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3349 }
3350
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3353 && vf < vect_min_worthwhile_factor (code)
3354 && !vec_stmt)
3355 {
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "not worthwhile without SIMD support.");
3359 return false;
3360 }
3361
3362 if (!vec_stmt) /* transformation not required. */
3363 {
3364 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3367 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3368 return true;
3369 }
3370
3371 /** Transform. **/
3372
3373 if (dump_enabled_p ())
3374 dump_printf_loc (MSG_NOTE, vect_location,
3375 "transform binary/unary operation.");
3376
3377 /* Handle def. */
3378 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3379
3380 prev_stmt_info = NULL;
3381 for (j = 0; j < ncopies; j++)
3382 {
3383 /* Handle uses. */
3384 if (j == 0)
3385 {
3386 if (scalar_shift_arg)
3387 {
3388 /* Vector shl and shr insn patterns can be defined with scalar
3389 operand 2 (shift operand). In this case, use constant or loop
3390 invariant op1 directly, without extending it to vector mode
3391 first. */
3392 optab_op2_mode = insn_data[icode].operand[2].mode;
3393 if (!VECTOR_MODE_P (optab_op2_mode))
3394 {
3395 if (dump_enabled_p ())
3396 dump_printf_loc (MSG_NOTE, vect_location,
3397 "operand 1 using scalar mode.");
3398 vec_oprnd1 = op1;
3399 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3400 vec_oprnds1.quick_push (vec_oprnd1);
3401 if (slp_node)
3402 {
3403 /* Store vec_oprnd1 for every vector stmt to be created
3404 for SLP_NODE. We check during the analysis that all
3405 the shift arguments are the same.
3406 TODO: Allow different constants for different vector
3407 stmts generated for an SLP instance. */
3408 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3409 vec_oprnds1.quick_push (vec_oprnd1);
3410 }
3411 }
3412 }
3413
3414 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3415 (a special case for certain kind of vector shifts); otherwise,
3416 operand 1 should be of a vector type (the usual case). */
3417 if (vec_oprnd1)
3418 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3419 slp_node, -1);
3420 else
3421 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3422 slp_node, -1);
3423 }
3424 else
3425 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3426
3427 /* Arguments are ready. Create the new vector stmt. */
3428 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3429 {
3430 vop1 = vec_oprnds1[i];
3431 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3432 new_temp = make_ssa_name (vec_dest, new_stmt);
3433 gimple_assign_set_lhs (new_stmt, new_temp);
3434 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3435 if (slp_node)
3436 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3437 }
3438
3439 if (slp_node)
3440 continue;
3441
3442 if (j == 0)
3443 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3444 else
3445 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3446 prev_stmt_info = vinfo_for_stmt (new_stmt);
3447 }
3448
3449 vec_oprnds0.release ();
3450 vec_oprnds1.release ();
3451
3452 return true;
3453 }
3454
3455
3456 static tree permute_vec_elements (tree, tree, tree, gimple,
3457 gimple_stmt_iterator *);
3458
3459
3460 /* Function vectorizable_operation.
3461
3462 Check if STMT performs a binary, unary or ternary operation that can
3463 be vectorized.
3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3465 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3467
3468 static bool
3469 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3470 gimple *vec_stmt, slp_tree slp_node)
3471 {
3472 tree vec_dest;
3473 tree scalar_dest;
3474 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3475 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3476 tree vectype;
3477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3478 enum tree_code code;
3479 enum machine_mode vec_mode;
3480 tree new_temp;
3481 int op_type;
3482 optab optab;
3483 int icode;
3484 tree def;
3485 gimple def_stmt;
3486 enum vect_def_type dt[3]
3487 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3488 gimple new_stmt = NULL;
3489 stmt_vec_info prev_stmt_info;
3490 int nunits_in;
3491 int nunits_out;
3492 tree vectype_out;
3493 int ncopies;
3494 int j, i;
3495 vec<tree> vec_oprnds0 = vNULL;
3496 vec<tree> vec_oprnds1 = vNULL;
3497 vec<tree> vec_oprnds2 = vNULL;
3498 tree vop0, vop1, vop2;
3499 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3500 int vf;
3501
3502 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3503 return false;
3504
3505 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3506 return false;
3507
3508 /* Is STMT a vectorizable binary/unary operation? */
3509 if (!is_gimple_assign (stmt))
3510 return false;
3511
3512 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3513 return false;
3514
3515 code = gimple_assign_rhs_code (stmt);
3516
3517 /* For pointer addition, we should use the normal plus for
3518 the vector addition. */
3519 if (code == POINTER_PLUS_EXPR)
3520 code = PLUS_EXPR;
3521
3522 /* Support only unary or binary operations. */
3523 op_type = TREE_CODE_LENGTH (code);
3524 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3525 {
3526 if (dump_enabled_p ())
3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3528 "num. args = %d (not unary/binary/ternary op).",
3529 op_type);
3530 return false;
3531 }
3532
3533 scalar_dest = gimple_assign_lhs (stmt);
3534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3535
3536 /* Most operations cannot handle bit-precision types without extra
3537 truncations. */
3538 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3539 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3540 /* Exception are bitwise binary operations. */
3541 && code != BIT_IOR_EXPR
3542 && code != BIT_XOR_EXPR
3543 && code != BIT_AND_EXPR)
3544 {
3545 if (dump_enabled_p ())
3546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3547 "bit-precision arithmetic not supported.");
3548 return false;
3549 }
3550
3551 op0 = gimple_assign_rhs1 (stmt);
3552 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3553 &def_stmt, &def, &dt[0], &vectype))
3554 {
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3557 "use not simple.");
3558 return false;
3559 }
3560 /* If op0 is an external or constant def use a vector type with
3561 the same size as the output vector type. */
3562 if (!vectype)
3563 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3564 if (vec_stmt)
3565 gcc_assert (vectype);
3566 if (!vectype)
3567 {
3568 if (dump_enabled_p ())
3569 {
3570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3571 "no vectype for scalar type ");
3572 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3573 TREE_TYPE (op0));
3574 }
3575
3576 return false;
3577 }
3578
3579 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3580 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3581 if (nunits_out != nunits_in)
3582 return false;
3583
3584 if (op_type == binary_op || op_type == ternary_op)
3585 {
3586 op1 = gimple_assign_rhs2 (stmt);
3587 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3588 &def, &dt[1]))
3589 {
3590 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3592 "use not simple.");
3593 return false;
3594 }
3595 }
3596 if (op_type == ternary_op)
3597 {
3598 op2 = gimple_assign_rhs3 (stmt);
3599 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3600 &def, &dt[2]))
3601 {
3602 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3604 "use not simple.");
3605 return false;
3606 }
3607 }
3608
3609 if (loop_vinfo)
3610 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3611 else
3612 vf = 1;
3613
3614 /* Multiple types in SLP are handled by creating the appropriate number of
3615 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3616 case of SLP. */
3617 if (slp_node || PURE_SLP_STMT (stmt_info))
3618 ncopies = 1;
3619 else
3620 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3621
3622 gcc_assert (ncopies >= 1);
3623
3624 /* Shifts are handled in vectorizable_shift (). */
3625 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3626 || code == RROTATE_EXPR)
3627 return false;
3628
3629 /* Supportable by target? */
3630
3631 vec_mode = TYPE_MODE (vectype);
3632 if (code == MULT_HIGHPART_EXPR)
3633 {
3634 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3635 icode = LAST_INSN_CODE;
3636 else
3637 icode = CODE_FOR_nothing;
3638 }
3639 else
3640 {
3641 optab = optab_for_tree_code (code, vectype, optab_default);
3642 if (!optab)
3643 {
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3646 "no optab.");
3647 return false;
3648 }
3649 icode = (int) optab_handler (optab, vec_mode);
3650 }
3651
3652 if (icode == CODE_FOR_nothing)
3653 {
3654 if (dump_enabled_p ())
3655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3656 "op not supported by target.");
3657 /* Check only during analysis. */
3658 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3659 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3660 return false;
3661 if (dump_enabled_p ())
3662 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3663 }
3664
3665 /* Worthwhile without SIMD support? Check only during analysis. */
3666 if (!VECTOR_MODE_P (vec_mode)
3667 && !vec_stmt
3668 && vf < vect_min_worthwhile_factor (code))
3669 {
3670 if (dump_enabled_p ())
3671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3672 "not worthwhile without SIMD support.");
3673 return false;
3674 }
3675
3676 if (!vec_stmt) /* transformation not required. */
3677 {
3678 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3679 if (dump_enabled_p ())
3680 dump_printf_loc (MSG_NOTE, vect_location,
3681 "=== vectorizable_operation ===");
3682 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3683 return true;
3684 }
3685
3686 /** Transform. **/
3687
3688 if (dump_enabled_p ())
3689 dump_printf_loc (MSG_NOTE, vect_location,
3690 "transform binary/unary operation.");
3691
3692 /* Handle def. */
3693 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3694
3695 /* In case the vectorization factor (VF) is bigger than the number
3696 of elements that we can fit in a vectype (nunits), we have to generate
3697 more than one vector stmt - i.e - we need to "unroll" the
3698 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3699 from one copy of the vector stmt to the next, in the field
3700 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3701 stages to find the correct vector defs to be used when vectorizing
3702 stmts that use the defs of the current stmt. The example below
3703 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3704 we need to create 4 vectorized stmts):
3705
3706 before vectorization:
3707 RELATED_STMT VEC_STMT
3708 S1: x = memref - -
3709 S2: z = x + 1 - -
3710
3711 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3712 there):
3713 RELATED_STMT VEC_STMT
3714 VS1_0: vx0 = memref0 VS1_1 -
3715 VS1_1: vx1 = memref1 VS1_2 -
3716 VS1_2: vx2 = memref2 VS1_3 -
3717 VS1_3: vx3 = memref3 - -
3718 S1: x = load - VS1_0
3719 S2: z = x + 1 - -
3720
3721 step2: vectorize stmt S2 (done here):
3722 To vectorize stmt S2 we first need to find the relevant vector
3723 def for the first operand 'x'. This is, as usual, obtained from
3724 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3725 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3726 relevant vector def 'vx0'. Having found 'vx0' we can generate
3727 the vector stmt VS2_0, and as usual, record it in the
3728 STMT_VINFO_VEC_STMT of stmt S2.
3729 When creating the second copy (VS2_1), we obtain the relevant vector
3730 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3731 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3732 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3733 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3734 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3735 chain of stmts and pointers:
3736 RELATED_STMT VEC_STMT
3737 VS1_0: vx0 = memref0 VS1_1 -
3738 VS1_1: vx1 = memref1 VS1_2 -
3739 VS1_2: vx2 = memref2 VS1_3 -
3740 VS1_3: vx3 = memref3 - -
3741 S1: x = load - VS1_0
3742 VS2_0: vz0 = vx0 + v1 VS2_1 -
3743 VS2_1: vz1 = vx1 + v1 VS2_2 -
3744 VS2_2: vz2 = vx2 + v1 VS2_3 -
3745 VS2_3: vz3 = vx3 + v1 - -
3746 S2: z = x + 1 - VS2_0 */
3747
3748 prev_stmt_info = NULL;
3749 for (j = 0; j < ncopies; j++)
3750 {
3751 /* Handle uses. */
3752 if (j == 0)
3753 {
3754 if (op_type == binary_op || op_type == ternary_op)
3755 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3756 slp_node, -1);
3757 else
3758 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3759 slp_node, -1);
3760 if (op_type == ternary_op)
3761 {
3762 vec_oprnds2.create (1);
3763 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3764 stmt,
3765 NULL));
3766 }
3767 }
3768 else
3769 {
3770 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3771 if (op_type == ternary_op)
3772 {
3773 tree vec_oprnd = vec_oprnds2.pop ();
3774 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3775 vec_oprnd));
3776 }
3777 }
3778
3779 /* Arguments are ready. Create the new vector stmt. */
3780 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3781 {
3782 vop1 = ((op_type == binary_op || op_type == ternary_op)
3783 ? vec_oprnds1[i] : NULL_TREE);
3784 vop2 = ((op_type == ternary_op)
3785 ? vec_oprnds2[i] : NULL_TREE);
3786 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3787 vop0, vop1, vop2);
3788 new_temp = make_ssa_name (vec_dest, new_stmt);
3789 gimple_assign_set_lhs (new_stmt, new_temp);
3790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3791 if (slp_node)
3792 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3793 }
3794
3795 if (slp_node)
3796 continue;
3797
3798 if (j == 0)
3799 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3800 else
3801 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3802 prev_stmt_info = vinfo_for_stmt (new_stmt);
3803 }
3804
3805 vec_oprnds0.release ();
3806 vec_oprnds1.release ();
3807 vec_oprnds2.release ();
3808
3809 return true;
3810 }
3811
3812
3813 /* Function vectorizable_store.
3814
3815 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3816 can be vectorized.
3817 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3818 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3819 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3820
3821 static bool
3822 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3823 slp_tree slp_node)
3824 {
3825 tree scalar_dest;
3826 tree data_ref;
3827 tree op;
3828 tree vec_oprnd = NULL_TREE;
3829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3830 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3831 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3832 tree elem_type;
3833 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3834 struct loop *loop = NULL;
3835 enum machine_mode vec_mode;
3836 tree dummy;
3837 enum dr_alignment_support alignment_support_scheme;
3838 tree def;
3839 gimple def_stmt;
3840 enum vect_def_type dt;
3841 stmt_vec_info prev_stmt_info = NULL;
3842 tree dataref_ptr = NULL_TREE;
3843 tree dataref_offset = NULL_TREE;
3844 gimple ptr_incr = NULL;
3845 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3846 int ncopies;
3847 int j;
3848 gimple next_stmt, first_stmt = NULL;
3849 bool grouped_store = false;
3850 bool store_lanes_p = false;
3851 unsigned int group_size, i;
3852 vec<tree> dr_chain = vNULL;
3853 vec<tree> oprnds = vNULL;
3854 vec<tree> result_chain = vNULL;
3855 bool inv_p;
3856 vec<tree> vec_oprnds = vNULL;
3857 bool slp = (slp_node != NULL);
3858 unsigned int vec_num;
3859 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3860 tree aggr_type;
3861
3862 if (loop_vinfo)
3863 loop = LOOP_VINFO_LOOP (loop_vinfo);
3864
3865 /* Multiple types in SLP are handled by creating the appropriate number of
3866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3867 case of SLP. */
3868 if (slp || PURE_SLP_STMT (stmt_info))
3869 ncopies = 1;
3870 else
3871 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3872
3873 gcc_assert (ncopies >= 1);
3874
3875 /* FORNOW. This restriction should be relaxed. */
3876 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3877 {
3878 if (dump_enabled_p ())
3879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3880 "multiple types in nested loop.");
3881 return false;
3882 }
3883
3884 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3885 return false;
3886
3887 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3888 return false;
3889
3890 /* Is vectorizable store? */
3891
3892 if (!is_gimple_assign (stmt))
3893 return false;
3894
3895 scalar_dest = gimple_assign_lhs (stmt);
3896 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3897 && is_pattern_stmt_p (stmt_info))
3898 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3899 if (TREE_CODE (scalar_dest) != ARRAY_REF
3900 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
3901 && TREE_CODE (scalar_dest) != INDIRECT_REF
3902 && TREE_CODE (scalar_dest) != COMPONENT_REF
3903 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3904 && TREE_CODE (scalar_dest) != REALPART_EXPR
3905 && TREE_CODE (scalar_dest) != MEM_REF)
3906 return false;
3907
3908 gcc_assert (gimple_assign_single_p (stmt));
3909 op = gimple_assign_rhs1 (stmt);
3910 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3911 &def, &dt))
3912 {
3913 if (dump_enabled_p ())
3914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3915 "use not simple.");
3916 return false;
3917 }
3918
3919 elem_type = TREE_TYPE (vectype);
3920 vec_mode = TYPE_MODE (vectype);
3921
3922 /* FORNOW. In some cases can vectorize even if data-type not supported
3923 (e.g. - array initialization with 0). */
3924 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3925 return false;
3926
3927 if (!STMT_VINFO_DATA_REF (stmt_info))
3928 return false;
3929
3930 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3931 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3932 size_zero_node) < 0)
3933 {
3934 if (dump_enabled_p ())
3935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3936 "negative step for store.");
3937 return false;
3938 }
3939
3940 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3941 {
3942 grouped_store = true;
3943 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3944 if (!slp && !PURE_SLP_STMT (stmt_info))
3945 {
3946 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3947 if (vect_store_lanes_supported (vectype, group_size))
3948 store_lanes_p = true;
3949 else if (!vect_grouped_store_supported (vectype, group_size))
3950 return false;
3951 }
3952
3953 if (first_stmt == stmt)
3954 {
3955 /* STMT is the leader of the group. Check the operands of all the
3956 stmts of the group. */
3957 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3958 while (next_stmt)
3959 {
3960 gcc_assert (gimple_assign_single_p (next_stmt));
3961 op = gimple_assign_rhs1 (next_stmt);
3962 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3963 &def_stmt, &def, &dt))
3964 {
3965 if (dump_enabled_p ())
3966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3967 "use not simple.");
3968 return false;
3969 }
3970 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3971 }
3972 }
3973 }
3974
3975 if (!vec_stmt) /* transformation not required. */
3976 {
3977 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3978 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3979 NULL, NULL, NULL);
3980 return true;
3981 }
3982
3983 /** Transform. **/
3984
3985 if (grouped_store)
3986 {
3987 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3988 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3989
3990 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3991
3992 /* FORNOW */
3993 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3994
3995 /* We vectorize all the stmts of the interleaving group when we
3996 reach the last stmt in the group. */
3997 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3998 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3999 && !slp)
4000 {
4001 *vec_stmt = NULL;
4002 return true;
4003 }
4004
4005 if (slp)
4006 {
4007 grouped_store = false;
4008 /* VEC_NUM is the number of vect stmts to be created for this
4009 group. */
4010 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4011 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4012 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4013 op = gimple_assign_rhs1 (first_stmt);
4014 }
4015 else
4016 /* VEC_NUM is the number of vect stmts to be created for this
4017 group. */
4018 vec_num = group_size;
4019 }
4020 else
4021 {
4022 first_stmt = stmt;
4023 first_dr = dr;
4024 group_size = vec_num = 1;
4025 }
4026
4027 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_NOTE, vect_location,
4029 "transform store. ncopies = %d", ncopies);
4030
4031 dr_chain.create (group_size);
4032 oprnds.create (group_size);
4033
4034 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4035 gcc_assert (alignment_support_scheme);
4036 /* Targets with store-lane instructions must not require explicit
4037 realignment. */
4038 gcc_assert (!store_lanes_p
4039 || alignment_support_scheme == dr_aligned
4040 || alignment_support_scheme == dr_unaligned_supported);
4041
4042 if (store_lanes_p)
4043 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4044 else
4045 aggr_type = vectype;
4046
4047 /* In case the vectorization factor (VF) is bigger than the number
4048 of elements that we can fit in a vectype (nunits), we have to generate
4049 more than one vector stmt - i.e - we need to "unroll" the
4050 vector stmt by a factor VF/nunits. For more details see documentation in
4051 vect_get_vec_def_for_copy_stmt. */
4052
4053 /* In case of interleaving (non-unit grouped access):
4054
4055 S1: &base + 2 = x2
4056 S2: &base = x0
4057 S3: &base + 1 = x1
4058 S4: &base + 3 = x3
4059
4060 We create vectorized stores starting from base address (the access of the
4061 first stmt in the chain (S2 in the above example), when the last store stmt
4062 of the chain (S4) is reached:
4063
4064 VS1: &base = vx2
4065 VS2: &base + vec_size*1 = vx0
4066 VS3: &base + vec_size*2 = vx1
4067 VS4: &base + vec_size*3 = vx3
4068
4069 Then permutation statements are generated:
4070
4071 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4072 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4073 ...
4074
4075 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4076 (the order of the data-refs in the output of vect_permute_store_chain
4077 corresponds to the order of scalar stmts in the interleaving chain - see
4078 the documentation of vect_permute_store_chain()).
4079
4080 In case of both multiple types and interleaving, above vector stores and
4081 permutation stmts are created for every copy. The result vector stmts are
4082 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4083 STMT_VINFO_RELATED_STMT for the next copies.
4084 */
4085
4086 prev_stmt_info = NULL;
4087 for (j = 0; j < ncopies; j++)
4088 {
4089 gimple new_stmt;
4090
4091 if (j == 0)
4092 {
4093 if (slp)
4094 {
4095 /* Get vectorized arguments for SLP_NODE. */
4096 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4097 NULL, slp_node, -1);
4098
4099 vec_oprnd = vec_oprnds[0];
4100 }
4101 else
4102 {
4103 /* For interleaved stores we collect vectorized defs for all the
4104 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4105 used as an input to vect_permute_store_chain(), and OPRNDS as
4106 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4107
4108 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4109 OPRNDS are of size 1. */
4110 next_stmt = first_stmt;
4111 for (i = 0; i < group_size; i++)
4112 {
4113 /* Since gaps are not supported for interleaved stores,
4114 GROUP_SIZE is the exact number of stmts in the chain.
4115 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4116 there is no interleaving, GROUP_SIZE is 1, and only one
4117 iteration of the loop will be executed. */
4118 gcc_assert (next_stmt
4119 && gimple_assign_single_p (next_stmt));
4120 op = gimple_assign_rhs1 (next_stmt);
4121
4122 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4123 NULL);
4124 dr_chain.quick_push (vec_oprnd);
4125 oprnds.quick_push (vec_oprnd);
4126 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4127 }
4128 }
4129
4130 /* We should have catched mismatched types earlier. */
4131 gcc_assert (useless_type_conversion_p (vectype,
4132 TREE_TYPE (vec_oprnd)));
4133 bool simd_lane_access_p
4134 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4135 if (simd_lane_access_p
4136 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4137 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4138 && integer_zerop (DR_OFFSET (first_dr))
4139 && integer_zerop (DR_INIT (first_dr))
4140 && alias_sets_conflict_p (get_alias_set (aggr_type),
4141 get_alias_set (DR_REF (first_dr))))
4142 {
4143 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4144 dataref_offset = build_int_cst (reference_alias_ptr_type
4145 (DR_REF (first_dr)), 0);
4146 }
4147 else
4148 dataref_ptr
4149 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4150 simd_lane_access_p ? loop : NULL,
4151 NULL_TREE, &dummy, gsi, &ptr_incr,
4152 simd_lane_access_p, &inv_p);
4153 gcc_assert (bb_vinfo || !inv_p);
4154 }
4155 else
4156 {
4157 /* For interleaved stores we created vectorized defs for all the
4158 defs stored in OPRNDS in the previous iteration (previous copy).
4159 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4160 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4161 next copy.
4162 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4163 OPRNDS are of size 1. */
4164 for (i = 0; i < group_size; i++)
4165 {
4166 op = oprnds[i];
4167 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4168 &def, &dt);
4169 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4170 dr_chain[i] = vec_oprnd;
4171 oprnds[i] = vec_oprnd;
4172 }
4173 if (dataref_offset)
4174 dataref_offset
4175 = int_const_binop (PLUS_EXPR, dataref_offset,
4176 TYPE_SIZE_UNIT (aggr_type));
4177 else
4178 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4179 TYPE_SIZE_UNIT (aggr_type));
4180 }
4181
4182 if (store_lanes_p)
4183 {
4184 tree vec_array;
4185
4186 /* Combine all the vectors into an array. */
4187 vec_array = create_vector_array (vectype, vec_num);
4188 for (i = 0; i < vec_num; i++)
4189 {
4190 vec_oprnd = dr_chain[i];
4191 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4192 }
4193
4194 /* Emit:
4195 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4196 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4197 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4198 gimple_call_set_lhs (new_stmt, data_ref);
4199 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4200 }
4201 else
4202 {
4203 new_stmt = NULL;
4204 if (grouped_store)
4205 {
4206 if (j == 0)
4207 result_chain.create (group_size);
4208 /* Permute. */
4209 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4210 &result_chain);
4211 }
4212
4213 next_stmt = first_stmt;
4214 for (i = 0; i < vec_num; i++)
4215 {
4216 unsigned align, misalign;
4217
4218 if (i > 0)
4219 /* Bump the vector pointer. */
4220 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4221 stmt, NULL_TREE);
4222
4223 if (slp)
4224 vec_oprnd = vec_oprnds[i];
4225 else if (grouped_store)
4226 /* For grouped stores vectorized defs are interleaved in
4227 vect_permute_store_chain(). */
4228 vec_oprnd = result_chain[i];
4229
4230 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4231 dataref_offset
4232 ? dataref_offset
4233 : build_int_cst (reference_alias_ptr_type
4234 (DR_REF (first_dr)), 0));
4235 align = TYPE_ALIGN_UNIT (vectype);
4236 if (aligned_access_p (first_dr))
4237 misalign = 0;
4238 else if (DR_MISALIGNMENT (first_dr) == -1)
4239 {
4240 TREE_TYPE (data_ref)
4241 = build_aligned_type (TREE_TYPE (data_ref),
4242 TYPE_ALIGN (elem_type));
4243 align = TYPE_ALIGN_UNIT (elem_type);
4244 misalign = 0;
4245 }
4246 else
4247 {
4248 TREE_TYPE (data_ref)
4249 = build_aligned_type (TREE_TYPE (data_ref),
4250 TYPE_ALIGN (elem_type));
4251 misalign = DR_MISALIGNMENT (first_dr);
4252 }
4253 if (dataref_offset == NULL_TREE)
4254 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4255 misalign);
4256
4257 /* Arguments are ready. Create the new vector stmt. */
4258 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4259 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4260
4261 if (slp)
4262 continue;
4263
4264 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4265 if (!next_stmt)
4266 break;
4267 }
4268 }
4269 if (!slp)
4270 {
4271 if (j == 0)
4272 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4273 else
4274 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4275 prev_stmt_info = vinfo_for_stmt (new_stmt);
4276 }
4277 }
4278
4279 dr_chain.release ();
4280 oprnds.release ();
4281 result_chain.release ();
4282 vec_oprnds.release ();
4283
4284 return true;
4285 }
4286
4287 /* Given a vector type VECTYPE and permutation SEL returns
4288 the VECTOR_CST mask that implements the permutation of the
4289 vector elements. If that is impossible to do, returns NULL. */
4290
4291 tree
4292 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4293 {
4294 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4295 int i, nunits;
4296
4297 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4298
4299 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4300 return NULL;
4301
4302 mask_elt_type = lang_hooks.types.type_for_mode
4303 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4304 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4305
4306 mask_elts = XALLOCAVEC (tree, nunits);
4307 for (i = nunits - 1; i >= 0; i--)
4308 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4309 mask_vec = build_vector (mask_type, mask_elts);
4310
4311 return mask_vec;
4312 }
4313
4314 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4315 reversal of the vector elements. If that is impossible to do,
4316 returns NULL. */
4317
4318 static tree
4319 perm_mask_for_reverse (tree vectype)
4320 {
4321 int i, nunits;
4322 unsigned char *sel;
4323
4324 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4325 sel = XALLOCAVEC (unsigned char, nunits);
4326
4327 for (i = 0; i < nunits; ++i)
4328 sel[i] = nunits - 1 - i;
4329
4330 return vect_gen_perm_mask (vectype, sel);
4331 }
4332
4333 /* Given a vector variable X and Y, that was generated for the scalar
4334 STMT, generate instructions to permute the vector elements of X and Y
4335 using permutation mask MASK_VEC, insert them at *GSI and return the
4336 permuted vector variable. */
4337
4338 static tree
4339 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4340 gimple_stmt_iterator *gsi)
4341 {
4342 tree vectype = TREE_TYPE (x);
4343 tree perm_dest, data_ref;
4344 gimple perm_stmt;
4345
4346 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4347 data_ref = make_ssa_name (perm_dest, NULL);
4348
4349 /* Generate the permute statement. */
4350 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4351 x, y, mask_vec);
4352 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4353
4354 return data_ref;
4355 }
4356
4357 /* vectorizable_load.
4358
4359 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4360 can be vectorized.
4361 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4362 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4363 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4364
4365 static bool
4366 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4367 slp_tree slp_node, slp_instance slp_node_instance)
4368 {
4369 tree scalar_dest;
4370 tree vec_dest = NULL;
4371 tree data_ref = NULL;
4372 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4373 stmt_vec_info prev_stmt_info;
4374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4375 struct loop *loop = NULL;
4376 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4377 bool nested_in_vect_loop = false;
4378 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4379 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4380 tree elem_type;
4381 tree new_temp;
4382 enum machine_mode mode;
4383 gimple new_stmt = NULL;
4384 tree dummy;
4385 enum dr_alignment_support alignment_support_scheme;
4386 tree dataref_ptr = NULL_TREE;
4387 tree dataref_offset = NULL_TREE;
4388 gimple ptr_incr = NULL;
4389 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4390 int ncopies;
4391 int i, j, group_size, group_gap;
4392 tree msq = NULL_TREE, lsq;
4393 tree offset = NULL_TREE;
4394 tree realignment_token = NULL_TREE;
4395 gimple phi = NULL;
4396 vec<tree> dr_chain = vNULL;
4397 bool grouped_load = false;
4398 bool load_lanes_p = false;
4399 gimple first_stmt;
4400 bool inv_p;
4401 bool negative = false;
4402 bool compute_in_loop = false;
4403 struct loop *at_loop;
4404 int vec_num;
4405 bool slp = (slp_node != NULL);
4406 bool slp_perm = false;
4407 enum tree_code code;
4408 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4409 int vf;
4410 tree aggr_type;
4411 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4412 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4413 int gather_scale = 1;
4414 enum vect_def_type gather_dt = vect_unknown_def_type;
4415
4416 if (loop_vinfo)
4417 {
4418 loop = LOOP_VINFO_LOOP (loop_vinfo);
4419 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4420 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4421 }
4422 else
4423 vf = 1;
4424
4425 /* Multiple types in SLP are handled by creating the appropriate number of
4426 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4427 case of SLP. */
4428 if (slp || PURE_SLP_STMT (stmt_info))
4429 ncopies = 1;
4430 else
4431 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4432
4433 gcc_assert (ncopies >= 1);
4434
4435 /* FORNOW. This restriction should be relaxed. */
4436 if (nested_in_vect_loop && ncopies > 1)
4437 {
4438 if (dump_enabled_p ())
4439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4440 "multiple types in nested loop.");
4441 return false;
4442 }
4443
4444 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4445 return false;
4446
4447 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4448 return false;
4449
4450 /* Is vectorizable load? */
4451 if (!is_gimple_assign (stmt))
4452 return false;
4453
4454 scalar_dest = gimple_assign_lhs (stmt);
4455 if (TREE_CODE (scalar_dest) != SSA_NAME)
4456 return false;
4457
4458 code = gimple_assign_rhs_code (stmt);
4459 if (code != ARRAY_REF
4460 && code != BIT_FIELD_REF
4461 && code != INDIRECT_REF
4462 && code != COMPONENT_REF
4463 && code != IMAGPART_EXPR
4464 && code != REALPART_EXPR
4465 && code != MEM_REF
4466 && TREE_CODE_CLASS (code) != tcc_declaration)
4467 return false;
4468
4469 if (!STMT_VINFO_DATA_REF (stmt_info))
4470 return false;
4471
4472 elem_type = TREE_TYPE (vectype);
4473 mode = TYPE_MODE (vectype);
4474
4475 /* FORNOW. In some cases can vectorize even if data-type not supported
4476 (e.g. - data copies). */
4477 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4478 {
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "Aligned load, but unsupported type.");
4482 return false;
4483 }
4484
4485 /* Check if the load is a part of an interleaving chain. */
4486 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4487 {
4488 grouped_load = true;
4489 /* FORNOW */
4490 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4491
4492 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4493 if (!slp && !PURE_SLP_STMT (stmt_info))
4494 {
4495 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4496 if (vect_load_lanes_supported (vectype, group_size))
4497 load_lanes_p = true;
4498 else if (!vect_grouped_load_supported (vectype, group_size))
4499 return false;
4500 }
4501 }
4502
4503
4504 if (STMT_VINFO_GATHER_P (stmt_info))
4505 {
4506 gimple def_stmt;
4507 tree def;
4508 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4509 &gather_off, &gather_scale);
4510 gcc_assert (gather_decl);
4511 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4512 &def_stmt, &def, &gather_dt,
4513 &gather_off_vectype))
4514 {
4515 if (dump_enabled_p ())
4516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4517 "gather index use not simple.");
4518 return false;
4519 }
4520 }
4521 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4522 ;
4523 else
4524 {
4525 negative = tree_int_cst_compare (nested_in_vect_loop
4526 ? STMT_VINFO_DR_STEP (stmt_info)
4527 : DR_STEP (dr),
4528 size_zero_node) < 0;
4529 if (negative && ncopies > 1)
4530 {
4531 if (dump_enabled_p ())
4532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4533 "multiple types with negative step.");
4534 return false;
4535 }
4536
4537 if (negative)
4538 {
4539 if (grouped_load)
4540 {
4541 if (dump_enabled_p ())
4542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4543 "negative step for group load not supported");
4544 return false;
4545 }
4546 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4547 if (alignment_support_scheme != dr_aligned
4548 && alignment_support_scheme != dr_unaligned_supported)
4549 {
4550 if (dump_enabled_p ())
4551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4552 "negative step but alignment required.");
4553 return false;
4554 }
4555 if (!perm_mask_for_reverse (vectype))
4556 {
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559 "negative step and reversing not supported.");
4560 return false;
4561 }
4562 }
4563 }
4564
4565 if (!vec_stmt) /* transformation not required. */
4566 {
4567 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4568 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4569 return true;
4570 }
4571
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_NOTE, vect_location,
4574 "transform load. ncopies = %d", ncopies);
4575
4576 /** Transform. **/
4577
4578 if (STMT_VINFO_GATHER_P (stmt_info))
4579 {
4580 tree vec_oprnd0 = NULL_TREE, op;
4581 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4582 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4583 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4584 edge pe = loop_preheader_edge (loop);
4585 gimple_seq seq;
4586 basic_block new_bb;
4587 enum { NARROW, NONE, WIDEN } modifier;
4588 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4589
4590 if (nunits == gather_off_nunits)
4591 modifier = NONE;
4592 else if (nunits == gather_off_nunits / 2)
4593 {
4594 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4595 modifier = WIDEN;
4596
4597 for (i = 0; i < gather_off_nunits; ++i)
4598 sel[i] = i | nunits;
4599
4600 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4601 gcc_assert (perm_mask != NULL_TREE);
4602 }
4603 else if (nunits == gather_off_nunits * 2)
4604 {
4605 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4606 modifier = NARROW;
4607
4608 for (i = 0; i < nunits; ++i)
4609 sel[i] = i < gather_off_nunits
4610 ? i : i + nunits - gather_off_nunits;
4611
4612 perm_mask = vect_gen_perm_mask (vectype, sel);
4613 gcc_assert (perm_mask != NULL_TREE);
4614 ncopies *= 2;
4615 }
4616 else
4617 gcc_unreachable ();
4618
4619 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4620 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4621 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4622 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4623 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4624 scaletype = TREE_VALUE (arglist);
4625 gcc_checking_assert (types_compatible_p (srctype, rettype)
4626 && types_compatible_p (srctype, masktype));
4627
4628 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4629
4630 ptr = fold_convert (ptrtype, gather_base);
4631 if (!is_gimple_min_invariant (ptr))
4632 {
4633 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4634 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4635 gcc_assert (!new_bb);
4636 }
4637
4638 /* Currently we support only unconditional gather loads,
4639 so mask should be all ones. */
4640 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4641 mask = build_int_cst (TREE_TYPE (masktype), -1);
4642 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4643 {
4644 REAL_VALUE_TYPE r;
4645 long tmp[6];
4646 for (j = 0; j < 6; ++j)
4647 tmp[j] = -1;
4648 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4649 mask = build_real (TREE_TYPE (masktype), r);
4650 }
4651 else
4652 gcc_unreachable ();
4653 mask = build_vector_from_val (masktype, mask);
4654 mask = vect_init_vector (stmt, mask, masktype, NULL);
4655
4656 scale = build_int_cst (scaletype, gather_scale);
4657
4658 prev_stmt_info = NULL;
4659 for (j = 0; j < ncopies; ++j)
4660 {
4661 if (modifier == WIDEN && (j & 1))
4662 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4663 perm_mask, stmt, gsi);
4664 else if (j == 0)
4665 op = vec_oprnd0
4666 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4667 else
4668 op = vec_oprnd0
4669 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4670
4671 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4672 {
4673 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4674 == TYPE_VECTOR_SUBPARTS (idxtype));
4675 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4676 var = make_ssa_name (var, NULL);
4677 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4678 new_stmt
4679 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4680 op, NULL_TREE);
4681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4682 op = var;
4683 }
4684
4685 new_stmt
4686 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4687
4688 if (!useless_type_conversion_p (vectype, rettype))
4689 {
4690 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4691 == TYPE_VECTOR_SUBPARTS (rettype));
4692 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4693 op = make_ssa_name (var, new_stmt);
4694 gimple_call_set_lhs (new_stmt, op);
4695 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4696 var = make_ssa_name (vec_dest, NULL);
4697 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4698 new_stmt
4699 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4700 NULL_TREE);
4701 }
4702 else
4703 {
4704 var = make_ssa_name (vec_dest, new_stmt);
4705 gimple_call_set_lhs (new_stmt, var);
4706 }
4707
4708 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4709
4710 if (modifier == NARROW)
4711 {
4712 if ((j & 1) == 0)
4713 {
4714 prev_res = var;
4715 continue;
4716 }
4717 var = permute_vec_elements (prev_res, var,
4718 perm_mask, stmt, gsi);
4719 new_stmt = SSA_NAME_DEF_STMT (var);
4720 }
4721
4722 if (prev_stmt_info == NULL)
4723 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4724 else
4725 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4726 prev_stmt_info = vinfo_for_stmt (new_stmt);
4727 }
4728 return true;
4729 }
4730 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4731 {
4732 gimple_stmt_iterator incr_gsi;
4733 bool insert_after;
4734 gimple incr;
4735 tree offvar;
4736 tree ivstep;
4737 tree running_off;
4738 vec<constructor_elt, va_gc> *v = NULL;
4739 gimple_seq stmts = NULL;
4740 tree stride_base, stride_step, alias_off;
4741
4742 gcc_assert (!nested_in_vect_loop);
4743
4744 stride_base
4745 = fold_build_pointer_plus
4746 (unshare_expr (DR_BASE_ADDRESS (dr)),
4747 size_binop (PLUS_EXPR,
4748 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4749 convert_to_ptrofftype (DR_INIT(dr))));
4750 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4751
4752 /* For a load with loop-invariant (but other than power-of-2)
4753 stride (i.e. not a grouped access) like so:
4754
4755 for (i = 0; i < n; i += stride)
4756 ... = array[i];
4757
4758 we generate a new induction variable and new accesses to
4759 form a new vector (or vectors, depending on ncopies):
4760
4761 for (j = 0; ; j += VF*stride)
4762 tmp1 = array[j];
4763 tmp2 = array[j + stride];
4764 ...
4765 vectemp = {tmp1, tmp2, ...}
4766 */
4767
4768 ivstep = stride_step;
4769 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4770 build_int_cst (TREE_TYPE (ivstep), vf));
4771
4772 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4773
4774 create_iv (stride_base, ivstep, NULL,
4775 loop, &incr_gsi, insert_after,
4776 &offvar, NULL);
4777 incr = gsi_stmt (incr_gsi);
4778 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4779
4780 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4781 if (stmts)
4782 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4783
4784 prev_stmt_info = NULL;
4785 running_off = offvar;
4786 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4787 for (j = 0; j < ncopies; j++)
4788 {
4789 tree vec_inv;
4790
4791 vec_alloc (v, nunits);
4792 for (i = 0; i < nunits; i++)
4793 {
4794 tree newref, newoff;
4795 gimple incr;
4796 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4797 running_off, alias_off);
4798
4799 newref = force_gimple_operand_gsi (gsi, newref, true,
4800 NULL_TREE, true,
4801 GSI_SAME_STMT);
4802 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4803 newoff = copy_ssa_name (running_off, NULL);
4804 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4805 running_off, stride_step);
4806 vect_finish_stmt_generation (stmt, incr, gsi);
4807
4808 running_off = newoff;
4809 }
4810
4811 vec_inv = build_constructor (vectype, v);
4812 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4813 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4814
4815 if (j == 0)
4816 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4817 else
4818 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4819 prev_stmt_info = vinfo_for_stmt (new_stmt);
4820 }
4821 return true;
4822 }
4823
4824 if (grouped_load)
4825 {
4826 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4827 if (slp
4828 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
4829 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4830 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4831
4832 /* Check if the chain of loads is already vectorized. */
4833 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4834 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4835 ??? But we can only do so if there is exactly one
4836 as we have no way to get at the rest. Leave the CSE
4837 opportunity alone.
4838 ??? With the group load eventually participating
4839 in multiple different permutations (having multiple
4840 slp nodes which refer to the same group) the CSE
4841 is even wrong code. See PR56270. */
4842 && !slp)
4843 {
4844 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4845 return true;
4846 }
4847 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4848 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4849
4850 /* VEC_NUM is the number of vect stmts to be created for this group. */
4851 if (slp)
4852 {
4853 grouped_load = false;
4854 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4855 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4856 slp_perm = true;
4857 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
4858 }
4859 else
4860 {
4861 vec_num = group_size;
4862 group_gap = 0;
4863 }
4864 }
4865 else
4866 {
4867 first_stmt = stmt;
4868 first_dr = dr;
4869 group_size = vec_num = 1;
4870 group_gap = 0;
4871 }
4872
4873 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4874 gcc_assert (alignment_support_scheme);
4875 /* Targets with load-lane instructions must not require explicit
4876 realignment. */
4877 gcc_assert (!load_lanes_p
4878 || alignment_support_scheme == dr_aligned
4879 || alignment_support_scheme == dr_unaligned_supported);
4880
4881 /* In case the vectorization factor (VF) is bigger than the number
4882 of elements that we can fit in a vectype (nunits), we have to generate
4883 more than one vector stmt - i.e - we need to "unroll" the
4884 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4885 from one copy of the vector stmt to the next, in the field
4886 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4887 stages to find the correct vector defs to be used when vectorizing
4888 stmts that use the defs of the current stmt. The example below
4889 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4890 need to create 4 vectorized stmts):
4891
4892 before vectorization:
4893 RELATED_STMT VEC_STMT
4894 S1: x = memref - -
4895 S2: z = x + 1 - -
4896
4897 step 1: vectorize stmt S1:
4898 We first create the vector stmt VS1_0, and, as usual, record a
4899 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4900 Next, we create the vector stmt VS1_1, and record a pointer to
4901 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4902 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4903 stmts and pointers:
4904 RELATED_STMT VEC_STMT
4905 VS1_0: vx0 = memref0 VS1_1 -
4906 VS1_1: vx1 = memref1 VS1_2 -
4907 VS1_2: vx2 = memref2 VS1_3 -
4908 VS1_3: vx3 = memref3 - -
4909 S1: x = load - VS1_0
4910 S2: z = x + 1 - -
4911
4912 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4913 information we recorded in RELATED_STMT field is used to vectorize
4914 stmt S2. */
4915
4916 /* In case of interleaving (non-unit grouped access):
4917
4918 S1: x2 = &base + 2
4919 S2: x0 = &base
4920 S3: x1 = &base + 1
4921 S4: x3 = &base + 3
4922
4923 Vectorized loads are created in the order of memory accesses
4924 starting from the access of the first stmt of the chain:
4925
4926 VS1: vx0 = &base
4927 VS2: vx1 = &base + vec_size*1
4928 VS3: vx3 = &base + vec_size*2
4929 VS4: vx4 = &base + vec_size*3
4930
4931 Then permutation statements are generated:
4932
4933 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4934 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4935 ...
4936
4937 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4938 (the order of the data-refs in the output of vect_permute_load_chain
4939 corresponds to the order of scalar stmts in the interleaving chain - see
4940 the documentation of vect_permute_load_chain()).
4941 The generation of permutation stmts and recording them in
4942 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4943
4944 In case of both multiple types and interleaving, the vector loads and
4945 permutation stmts above are created for every copy. The result vector
4946 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4947 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4948
4949 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4950 on a target that supports unaligned accesses (dr_unaligned_supported)
4951 we generate the following code:
4952 p = initial_addr;
4953 indx = 0;
4954 loop {
4955 p = p + indx * vectype_size;
4956 vec_dest = *(p);
4957 indx = indx + 1;
4958 }
4959
4960 Otherwise, the data reference is potentially unaligned on a target that
4961 does not support unaligned accesses (dr_explicit_realign_optimized) -
4962 then generate the following code, in which the data in each iteration is
4963 obtained by two vector loads, one from the previous iteration, and one
4964 from the current iteration:
4965 p1 = initial_addr;
4966 msq_init = *(floor(p1))
4967 p2 = initial_addr + VS - 1;
4968 realignment_token = call target_builtin;
4969 indx = 0;
4970 loop {
4971 p2 = p2 + indx * vectype_size
4972 lsq = *(floor(p2))
4973 vec_dest = realign_load (msq, lsq, realignment_token)
4974 indx = indx + 1;
4975 msq = lsq;
4976 } */
4977
4978 /* If the misalignment remains the same throughout the execution of the
4979 loop, we can create the init_addr and permutation mask at the loop
4980 preheader. Otherwise, it needs to be created inside the loop.
4981 This can only occur when vectorizing memory accesses in the inner-loop
4982 nested within an outer-loop that is being vectorized. */
4983
4984 if (nested_in_vect_loop
4985 && (TREE_INT_CST_LOW (DR_STEP (dr))
4986 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4987 {
4988 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4989 compute_in_loop = true;
4990 }
4991
4992 if ((alignment_support_scheme == dr_explicit_realign_optimized
4993 || alignment_support_scheme == dr_explicit_realign)
4994 && !compute_in_loop)
4995 {
4996 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4997 alignment_support_scheme, NULL_TREE,
4998 &at_loop);
4999 if (alignment_support_scheme == dr_explicit_realign_optimized)
5000 {
5001 phi = SSA_NAME_DEF_STMT (msq);
5002 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5003 }
5004 }
5005 else
5006 at_loop = loop;
5007
5008 if (negative)
5009 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5010
5011 if (load_lanes_p)
5012 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5013 else
5014 aggr_type = vectype;
5015
5016 prev_stmt_info = NULL;
5017 for (j = 0; j < ncopies; j++)
5018 {
5019 /* 1. Create the vector or array pointer update chain. */
5020 if (j == 0)
5021 {
5022 bool simd_lane_access_p
5023 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5024 if (simd_lane_access_p
5025 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5026 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5027 && integer_zerop (DR_OFFSET (first_dr))
5028 && integer_zerop (DR_INIT (first_dr))
5029 && alias_sets_conflict_p (get_alias_set (aggr_type),
5030 get_alias_set (DR_REF (first_dr)))
5031 && (alignment_support_scheme == dr_aligned
5032 || alignment_support_scheme == dr_unaligned_supported))
5033 {
5034 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5035 dataref_offset = build_int_cst (reference_alias_ptr_type
5036 (DR_REF (first_dr)), 0);
5037 }
5038 else
5039 dataref_ptr
5040 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5041 offset, &dummy, gsi, &ptr_incr,
5042 simd_lane_access_p, &inv_p);
5043 }
5044 else if (dataref_offset)
5045 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5046 TYPE_SIZE_UNIT (aggr_type));
5047 else
5048 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5049 TYPE_SIZE_UNIT (aggr_type));
5050
5051 if (grouped_load || slp_perm)
5052 dr_chain.create (vec_num);
5053
5054 if (load_lanes_p)
5055 {
5056 tree vec_array;
5057
5058 vec_array = create_vector_array (vectype, vec_num);
5059
5060 /* Emit:
5061 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5062 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5063 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5064 gimple_call_set_lhs (new_stmt, vec_array);
5065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5066
5067 /* Extract each vector into an SSA_NAME. */
5068 for (i = 0; i < vec_num; i++)
5069 {
5070 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5071 vec_array, i);
5072 dr_chain.quick_push (new_temp);
5073 }
5074
5075 /* Record the mapping between SSA_NAMEs and statements. */
5076 vect_record_grouped_load_vectors (stmt, dr_chain);
5077 }
5078 else
5079 {
5080 for (i = 0; i < vec_num; i++)
5081 {
5082 if (i > 0)
5083 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5084 stmt, NULL_TREE);
5085
5086 /* 2. Create the vector-load in the loop. */
5087 switch (alignment_support_scheme)
5088 {
5089 case dr_aligned:
5090 case dr_unaligned_supported:
5091 {
5092 unsigned int align, misalign;
5093
5094 data_ref
5095 = build2 (MEM_REF, vectype, dataref_ptr,
5096 dataref_offset
5097 ? dataref_offset
5098 : build_int_cst (reference_alias_ptr_type
5099 (DR_REF (first_dr)), 0));
5100 align = TYPE_ALIGN_UNIT (vectype);
5101 if (alignment_support_scheme == dr_aligned)
5102 {
5103 gcc_assert (aligned_access_p (first_dr));
5104 misalign = 0;
5105 }
5106 else if (DR_MISALIGNMENT (first_dr) == -1)
5107 {
5108 TREE_TYPE (data_ref)
5109 = build_aligned_type (TREE_TYPE (data_ref),
5110 TYPE_ALIGN (elem_type));
5111 align = TYPE_ALIGN_UNIT (elem_type);
5112 misalign = 0;
5113 }
5114 else
5115 {
5116 TREE_TYPE (data_ref)
5117 = build_aligned_type (TREE_TYPE (data_ref),
5118 TYPE_ALIGN (elem_type));
5119 misalign = DR_MISALIGNMENT (first_dr);
5120 }
5121 if (dataref_offset == NULL_TREE)
5122 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5123 align, misalign);
5124 break;
5125 }
5126 case dr_explicit_realign:
5127 {
5128 tree ptr, bump;
5129 tree vs_minus_1;
5130
5131 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5132
5133 if (compute_in_loop)
5134 msq = vect_setup_realignment (first_stmt, gsi,
5135 &realignment_token,
5136 dr_explicit_realign,
5137 dataref_ptr, NULL);
5138
5139 ptr = copy_ssa_name (dataref_ptr, NULL);
5140 new_stmt = gimple_build_assign_with_ops
5141 (BIT_AND_EXPR, ptr, dataref_ptr,
5142 build_int_cst
5143 (TREE_TYPE (dataref_ptr),
5144 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5145 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5146 data_ref
5147 = build2 (MEM_REF, vectype, ptr,
5148 build_int_cst (reference_alias_ptr_type
5149 (DR_REF (first_dr)), 0));
5150 vec_dest = vect_create_destination_var (scalar_dest,
5151 vectype);
5152 new_stmt = gimple_build_assign (vec_dest, data_ref);
5153 new_temp = make_ssa_name (vec_dest, new_stmt);
5154 gimple_assign_set_lhs (new_stmt, new_temp);
5155 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5156 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5158 msq = new_temp;
5159
5160 bump = size_binop (MULT_EXPR, vs_minus_1,
5161 TYPE_SIZE_UNIT (elem_type));
5162 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5163 new_stmt = gimple_build_assign_with_ops
5164 (BIT_AND_EXPR, NULL_TREE, ptr,
5165 build_int_cst
5166 (TREE_TYPE (ptr),
5167 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5168 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5169 gimple_assign_set_lhs (new_stmt, ptr);
5170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5171 data_ref
5172 = build2 (MEM_REF, vectype, ptr,
5173 build_int_cst (reference_alias_ptr_type
5174 (DR_REF (first_dr)), 0));
5175 break;
5176 }
5177 case dr_explicit_realign_optimized:
5178 new_temp = copy_ssa_name (dataref_ptr, NULL);
5179 new_stmt = gimple_build_assign_with_ops
5180 (BIT_AND_EXPR, new_temp, dataref_ptr,
5181 build_int_cst
5182 (TREE_TYPE (dataref_ptr),
5183 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5185 data_ref
5186 = build2 (MEM_REF, vectype, new_temp,
5187 build_int_cst (reference_alias_ptr_type
5188 (DR_REF (first_dr)), 0));
5189 break;
5190 default:
5191 gcc_unreachable ();
5192 }
5193 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5194 new_stmt = gimple_build_assign (vec_dest, data_ref);
5195 new_temp = make_ssa_name (vec_dest, new_stmt);
5196 gimple_assign_set_lhs (new_stmt, new_temp);
5197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5198
5199 /* 3. Handle explicit realignment if necessary/supported.
5200 Create in loop:
5201 vec_dest = realign_load (msq, lsq, realignment_token) */
5202 if (alignment_support_scheme == dr_explicit_realign_optimized
5203 || alignment_support_scheme == dr_explicit_realign)
5204 {
5205 lsq = gimple_assign_lhs (new_stmt);
5206 if (!realignment_token)
5207 realignment_token = dataref_ptr;
5208 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5209 new_stmt
5210 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5211 vec_dest, msq, lsq,
5212 realignment_token);
5213 new_temp = make_ssa_name (vec_dest, new_stmt);
5214 gimple_assign_set_lhs (new_stmt, new_temp);
5215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5216
5217 if (alignment_support_scheme == dr_explicit_realign_optimized)
5218 {
5219 gcc_assert (phi);
5220 if (i == vec_num - 1 && j == ncopies - 1)
5221 add_phi_arg (phi, lsq,
5222 loop_latch_edge (containing_loop),
5223 UNKNOWN_LOCATION);
5224 msq = lsq;
5225 }
5226 }
5227
5228 /* 4. Handle invariant-load. */
5229 if (inv_p && !bb_vinfo)
5230 {
5231 gimple_stmt_iterator gsi2 = *gsi;
5232 gcc_assert (!grouped_load);
5233 gsi_next (&gsi2);
5234 new_temp = vect_init_vector (stmt, scalar_dest,
5235 vectype, &gsi2);
5236 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5237 }
5238
5239 if (negative)
5240 {
5241 tree perm_mask = perm_mask_for_reverse (vectype);
5242 new_temp = permute_vec_elements (new_temp, new_temp,
5243 perm_mask, stmt, gsi);
5244 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5245 }
5246
5247 /* Collect vector loads and later create their permutation in
5248 vect_transform_grouped_load (). */
5249 if (grouped_load || slp_perm)
5250 dr_chain.quick_push (new_temp);
5251
5252 /* Store vector loads in the corresponding SLP_NODE. */
5253 if (slp && !slp_perm)
5254 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5255 }
5256 /* Bump the vector pointer to account for a gap. */
5257 if (slp && group_gap != 0)
5258 {
5259 tree bump = size_binop (MULT_EXPR,
5260 TYPE_SIZE_UNIT (elem_type),
5261 size_int (group_gap));
5262 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5263 stmt, bump);
5264 }
5265 }
5266
5267 if (slp && !slp_perm)
5268 continue;
5269
5270 if (slp_perm)
5271 {
5272 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
5273 slp_node_instance, false))
5274 {
5275 dr_chain.release ();
5276 return false;
5277 }
5278 }
5279 else
5280 {
5281 if (grouped_load)
5282 {
5283 if (!load_lanes_p)
5284 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5285 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5286 }
5287 else
5288 {
5289 if (j == 0)
5290 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5291 else
5292 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5293 prev_stmt_info = vinfo_for_stmt (new_stmt);
5294 }
5295 }
5296 dr_chain.release ();
5297 }
5298
5299 return true;
5300 }
5301
5302 /* Function vect_is_simple_cond.
5303
5304 Input:
5305 LOOP - the loop that is being vectorized.
5306 COND - Condition that is checked for simple use.
5307
5308 Output:
5309 *COMP_VECTYPE - the vector type for the comparison.
5310
5311 Returns whether a COND can be vectorized. Checks whether
5312 condition operands are supportable using vec_is_simple_use. */
5313
5314 static bool
5315 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5316 bb_vec_info bb_vinfo, tree *comp_vectype)
5317 {
5318 tree lhs, rhs;
5319 tree def;
5320 enum vect_def_type dt;
5321 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5322
5323 if (!COMPARISON_CLASS_P (cond))
5324 return false;
5325
5326 lhs = TREE_OPERAND (cond, 0);
5327 rhs = TREE_OPERAND (cond, 1);
5328
5329 if (TREE_CODE (lhs) == SSA_NAME)
5330 {
5331 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5332 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5333 &lhs_def_stmt, &def, &dt, &vectype1))
5334 return false;
5335 }
5336 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5337 && TREE_CODE (lhs) != FIXED_CST)
5338 return false;
5339
5340 if (TREE_CODE (rhs) == SSA_NAME)
5341 {
5342 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5343 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5344 &rhs_def_stmt, &def, &dt, &vectype2))
5345 return false;
5346 }
5347 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5348 && TREE_CODE (rhs) != FIXED_CST)
5349 return false;
5350
5351 *comp_vectype = vectype1 ? vectype1 : vectype2;
5352 return true;
5353 }
5354
5355 /* vectorizable_condition.
5356
5357 Check if STMT is conditional modify expression that can be vectorized.
5358 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5359 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5360 at GSI.
5361
5362 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5363 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5364 else caluse if it is 2).
5365
5366 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5367
5368 bool
5369 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5370 gimple *vec_stmt, tree reduc_def, int reduc_index,
5371 slp_tree slp_node)
5372 {
5373 tree scalar_dest = NULL_TREE;
5374 tree vec_dest = NULL_TREE;
5375 tree cond_expr, then_clause, else_clause;
5376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5377 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5378 tree comp_vectype = NULL_TREE;
5379 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5380 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5381 tree vec_compare, vec_cond_expr;
5382 tree new_temp;
5383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5384 tree def;
5385 enum vect_def_type dt, dts[4];
5386 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5387 int ncopies;
5388 enum tree_code code;
5389 stmt_vec_info prev_stmt_info = NULL;
5390 int i, j;
5391 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5392 vec<tree> vec_oprnds0 = vNULL;
5393 vec<tree> vec_oprnds1 = vNULL;
5394 vec<tree> vec_oprnds2 = vNULL;
5395 vec<tree> vec_oprnds3 = vNULL;
5396 tree vec_cmp_type;
5397
5398 if (slp_node || PURE_SLP_STMT (stmt_info))
5399 ncopies = 1;
5400 else
5401 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5402
5403 gcc_assert (ncopies >= 1);
5404 if (reduc_index && ncopies > 1)
5405 return false; /* FORNOW */
5406
5407 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5408 return false;
5409
5410 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5411 return false;
5412
5413 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5414 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5415 && reduc_def))
5416 return false;
5417
5418 /* FORNOW: not yet supported. */
5419 if (STMT_VINFO_LIVE_P (stmt_info))
5420 {
5421 if (dump_enabled_p ())
5422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5423 "value used after loop.");
5424 return false;
5425 }
5426
5427 /* Is vectorizable conditional operation? */
5428 if (!is_gimple_assign (stmt))
5429 return false;
5430
5431 code = gimple_assign_rhs_code (stmt);
5432
5433 if (code != COND_EXPR)
5434 return false;
5435
5436 cond_expr = gimple_assign_rhs1 (stmt);
5437 then_clause = gimple_assign_rhs2 (stmt);
5438 else_clause = gimple_assign_rhs3 (stmt);
5439
5440 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5441 &comp_vectype)
5442 || !comp_vectype)
5443 return false;
5444
5445 if (TREE_CODE (then_clause) == SSA_NAME)
5446 {
5447 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5448 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5449 &then_def_stmt, &def, &dt))
5450 return false;
5451 }
5452 else if (TREE_CODE (then_clause) != INTEGER_CST
5453 && TREE_CODE (then_clause) != REAL_CST
5454 && TREE_CODE (then_clause) != FIXED_CST)
5455 return false;
5456
5457 if (TREE_CODE (else_clause) == SSA_NAME)
5458 {
5459 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5460 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5461 &else_def_stmt, &def, &dt))
5462 return false;
5463 }
5464 else if (TREE_CODE (else_clause) != INTEGER_CST
5465 && TREE_CODE (else_clause) != REAL_CST
5466 && TREE_CODE (else_clause) != FIXED_CST)
5467 return false;
5468
5469 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5470 /* The result of a vector comparison should be signed type. */
5471 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5472 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5473 if (vec_cmp_type == NULL_TREE)
5474 return false;
5475
5476 if (!vec_stmt)
5477 {
5478 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5479 return expand_vec_cond_expr_p (vectype, comp_vectype);
5480 }
5481
5482 /* Transform. */
5483
5484 if (!slp_node)
5485 {
5486 vec_oprnds0.create (1);
5487 vec_oprnds1.create (1);
5488 vec_oprnds2.create (1);
5489 vec_oprnds3.create (1);
5490 }
5491
5492 /* Handle def. */
5493 scalar_dest = gimple_assign_lhs (stmt);
5494 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5495
5496 /* Handle cond expr. */
5497 for (j = 0; j < ncopies; j++)
5498 {
5499 gimple new_stmt = NULL;
5500 if (j == 0)
5501 {
5502 if (slp_node)
5503 {
5504 vec<tree> ops;
5505 ops.create (4);
5506 vec<vec<tree> > vec_defs;
5507
5508 vec_defs.create (4);
5509 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5510 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5511 ops.safe_push (then_clause);
5512 ops.safe_push (else_clause);
5513 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5514 vec_oprnds3 = vec_defs.pop ();
5515 vec_oprnds2 = vec_defs.pop ();
5516 vec_oprnds1 = vec_defs.pop ();
5517 vec_oprnds0 = vec_defs.pop ();
5518
5519 ops.release ();
5520 vec_defs.release ();
5521 }
5522 else
5523 {
5524 gimple gtemp;
5525 vec_cond_lhs =
5526 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5527 stmt, NULL);
5528 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5529 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5530
5531 vec_cond_rhs =
5532 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5533 stmt, NULL);
5534 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5535 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5536 if (reduc_index == 1)
5537 vec_then_clause = reduc_def;
5538 else
5539 {
5540 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5541 stmt, NULL);
5542 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5543 NULL, &gtemp, &def, &dts[2]);
5544 }
5545 if (reduc_index == 2)
5546 vec_else_clause = reduc_def;
5547 else
5548 {
5549 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5550 stmt, NULL);
5551 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5552 NULL, &gtemp, &def, &dts[3]);
5553 }
5554 }
5555 }
5556 else
5557 {
5558 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5559 vec_oprnds0.pop ());
5560 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5561 vec_oprnds1.pop ());
5562 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5563 vec_oprnds2.pop ());
5564 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5565 vec_oprnds3.pop ());
5566 }
5567
5568 if (!slp_node)
5569 {
5570 vec_oprnds0.quick_push (vec_cond_lhs);
5571 vec_oprnds1.quick_push (vec_cond_rhs);
5572 vec_oprnds2.quick_push (vec_then_clause);
5573 vec_oprnds3.quick_push (vec_else_clause);
5574 }
5575
5576 /* Arguments are ready. Create the new vector stmt. */
5577 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5578 {
5579 vec_cond_rhs = vec_oprnds1[i];
5580 vec_then_clause = vec_oprnds2[i];
5581 vec_else_clause = vec_oprnds3[i];
5582
5583 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5584 vec_cond_lhs, vec_cond_rhs);
5585 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5586 vec_compare, vec_then_clause, vec_else_clause);
5587
5588 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5589 new_temp = make_ssa_name (vec_dest, new_stmt);
5590 gimple_assign_set_lhs (new_stmt, new_temp);
5591 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5592 if (slp_node)
5593 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5594 }
5595
5596 if (slp_node)
5597 continue;
5598
5599 if (j == 0)
5600 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5601 else
5602 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5603
5604 prev_stmt_info = vinfo_for_stmt (new_stmt);
5605 }
5606
5607 vec_oprnds0.release ();
5608 vec_oprnds1.release ();
5609 vec_oprnds2.release ();
5610 vec_oprnds3.release ();
5611
5612 return true;
5613 }
5614
5615
5616 /* Make sure the statement is vectorizable. */
5617
5618 bool
5619 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5620 {
5621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5622 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5623 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5624 bool ok;
5625 tree scalar_type, vectype;
5626 gimple pattern_stmt;
5627 gimple_seq pattern_def_seq;
5628
5629 if (dump_enabled_p ())
5630 {
5631 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5632 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5633 }
5634
5635 if (gimple_has_volatile_ops (stmt))
5636 {
5637 if (dump_enabled_p ())
5638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5639 "not vectorized: stmt has volatile operands");
5640
5641 return false;
5642 }
5643
5644 /* Skip stmts that do not need to be vectorized. In loops this is expected
5645 to include:
5646 - the COND_EXPR which is the loop exit condition
5647 - any LABEL_EXPRs in the loop
5648 - computations that are used only for array indexing or loop control.
5649 In basic blocks we only analyze statements that are a part of some SLP
5650 instance, therefore, all the statements are relevant.
5651
5652 Pattern statement needs to be analyzed instead of the original statement
5653 if the original statement is not relevant. Otherwise, we analyze both
5654 statements. In basic blocks we are called from some SLP instance
5655 traversal, don't analyze pattern stmts instead, the pattern stmts
5656 already will be part of SLP instance. */
5657
5658 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5659 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5660 && !STMT_VINFO_LIVE_P (stmt_info))
5661 {
5662 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5663 && pattern_stmt
5664 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5665 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5666 {
5667 /* Analyze PATTERN_STMT instead of the original stmt. */
5668 stmt = pattern_stmt;
5669 stmt_info = vinfo_for_stmt (pattern_stmt);
5670 if (dump_enabled_p ())
5671 {
5672 dump_printf_loc (MSG_NOTE, vect_location,
5673 "==> examining pattern statement: ");
5674 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5675 }
5676 }
5677 else
5678 {
5679 if (dump_enabled_p ())
5680 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5681
5682 return true;
5683 }
5684 }
5685 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5686 && node == NULL
5687 && pattern_stmt
5688 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5689 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5690 {
5691 /* Analyze PATTERN_STMT too. */
5692 if (dump_enabled_p ())
5693 {
5694 dump_printf_loc (MSG_NOTE, vect_location,
5695 "==> examining pattern statement: ");
5696 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5697 }
5698
5699 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5700 return false;
5701 }
5702
5703 if (is_pattern_stmt_p (stmt_info)
5704 && node == NULL
5705 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5706 {
5707 gimple_stmt_iterator si;
5708
5709 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5710 {
5711 gimple pattern_def_stmt = gsi_stmt (si);
5712 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5713 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5714 {
5715 /* Analyze def stmt of STMT if it's a pattern stmt. */
5716 if (dump_enabled_p ())
5717 {
5718 dump_printf_loc (MSG_NOTE, vect_location,
5719 "==> examining pattern def statement: ");
5720 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5721 }
5722
5723 if (!vect_analyze_stmt (pattern_def_stmt,
5724 need_to_vectorize, node))
5725 return false;
5726 }
5727 }
5728 }
5729
5730 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5731 {
5732 case vect_internal_def:
5733 break;
5734
5735 case vect_reduction_def:
5736 case vect_nested_cycle:
5737 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5738 || relevance == vect_used_in_outer_by_reduction
5739 || relevance == vect_unused_in_scope));
5740 break;
5741
5742 case vect_induction_def:
5743 case vect_constant_def:
5744 case vect_external_def:
5745 case vect_unknown_def_type:
5746 default:
5747 gcc_unreachable ();
5748 }
5749
5750 if (bb_vinfo)
5751 {
5752 gcc_assert (PURE_SLP_STMT (stmt_info));
5753
5754 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5755 if (dump_enabled_p ())
5756 {
5757 dump_printf_loc (MSG_NOTE, vect_location,
5758 "get vectype for scalar type: ");
5759 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5760 }
5761
5762 vectype = get_vectype_for_scalar_type (scalar_type);
5763 if (!vectype)
5764 {
5765 if (dump_enabled_p ())
5766 {
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5768 "not SLPed: unsupported data-type ");
5769 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5770 scalar_type);
5771 }
5772 return false;
5773 }
5774
5775 if (dump_enabled_p ())
5776 {
5777 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5778 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5779 }
5780
5781 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5782 }
5783
5784 if (STMT_VINFO_RELEVANT_P (stmt_info))
5785 {
5786 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5787 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5788 *need_to_vectorize = true;
5789 }
5790
5791 ok = true;
5792 if (!bb_vinfo
5793 && (STMT_VINFO_RELEVANT_P (stmt_info)
5794 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5795 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5796 || vectorizable_shift (stmt, NULL, NULL, NULL)
5797 || vectorizable_operation (stmt, NULL, NULL, NULL)
5798 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5799 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5800 || vectorizable_call (stmt, NULL, NULL, NULL)
5801 || vectorizable_store (stmt, NULL, NULL, NULL)
5802 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5803 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5804 else
5805 {
5806 if (bb_vinfo)
5807 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5808 || vectorizable_shift (stmt, NULL, NULL, node)
5809 || vectorizable_operation (stmt, NULL, NULL, node)
5810 || vectorizable_assignment (stmt, NULL, NULL, node)
5811 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5812 || vectorizable_call (stmt, NULL, NULL, node)
5813 || vectorizable_store (stmt, NULL, NULL, node)
5814 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5815 }
5816
5817 if (!ok)
5818 {
5819 if (dump_enabled_p ())
5820 {
5821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5822 "not vectorized: relevant stmt not ");
5823 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5824 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5825 }
5826
5827 return false;
5828 }
5829
5830 if (bb_vinfo)
5831 return true;
5832
5833 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5834 need extra handling, except for vectorizable reductions. */
5835 if (STMT_VINFO_LIVE_P (stmt_info)
5836 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5837 ok = vectorizable_live_operation (stmt, NULL, NULL);
5838
5839 if (!ok)
5840 {
5841 if (dump_enabled_p ())
5842 {
5843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5844 "not vectorized: live stmt not ");
5845 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5846 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5847 }
5848
5849 return false;
5850 }
5851
5852 return true;
5853 }
5854
5855
5856 /* Function vect_transform_stmt.
5857
5858 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5859
5860 bool
5861 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5862 bool *grouped_store, slp_tree slp_node,
5863 slp_instance slp_node_instance)
5864 {
5865 bool is_store = false;
5866 gimple vec_stmt = NULL;
5867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5868 bool done;
5869
5870 switch (STMT_VINFO_TYPE (stmt_info))
5871 {
5872 case type_demotion_vec_info_type:
5873 case type_promotion_vec_info_type:
5874 case type_conversion_vec_info_type:
5875 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5876 gcc_assert (done);
5877 break;
5878
5879 case induc_vec_info_type:
5880 gcc_assert (!slp_node);
5881 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5882 gcc_assert (done);
5883 break;
5884
5885 case shift_vec_info_type:
5886 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5887 gcc_assert (done);
5888 break;
5889
5890 case op_vec_info_type:
5891 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5892 gcc_assert (done);
5893 break;
5894
5895 case assignment_vec_info_type:
5896 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5897 gcc_assert (done);
5898 break;
5899
5900 case load_vec_info_type:
5901 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5902 slp_node_instance);
5903 gcc_assert (done);
5904 break;
5905
5906 case store_vec_info_type:
5907 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5908 gcc_assert (done);
5909 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5910 {
5911 /* In case of interleaving, the whole chain is vectorized when the
5912 last store in the chain is reached. Store stmts before the last
5913 one are skipped, and there vec_stmt_info shouldn't be freed
5914 meanwhile. */
5915 *grouped_store = true;
5916 if (STMT_VINFO_VEC_STMT (stmt_info))
5917 is_store = true;
5918 }
5919 else
5920 is_store = true;
5921 break;
5922
5923 case condition_vec_info_type:
5924 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5925 gcc_assert (done);
5926 break;
5927
5928 case call_vec_info_type:
5929 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5930 stmt = gsi_stmt (*gsi);
5931 break;
5932
5933 case reduc_vec_info_type:
5934 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5935 gcc_assert (done);
5936 break;
5937
5938 default:
5939 if (!STMT_VINFO_LIVE_P (stmt_info))
5940 {
5941 if (dump_enabled_p ())
5942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5943 "stmt not supported.");
5944 gcc_unreachable ();
5945 }
5946 }
5947
5948 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5949 is being vectorized, but outside the immediately enclosing loop. */
5950 if (vec_stmt
5951 && STMT_VINFO_LOOP_VINFO (stmt_info)
5952 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5953 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5954 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5955 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5956 || STMT_VINFO_RELEVANT (stmt_info) ==
5957 vect_used_in_outer_by_reduction))
5958 {
5959 struct loop *innerloop = LOOP_VINFO_LOOP (
5960 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5961 imm_use_iterator imm_iter;
5962 use_operand_p use_p;
5963 tree scalar_dest;
5964 gimple exit_phi;
5965
5966 if (dump_enabled_p ())
5967 dump_printf_loc (MSG_NOTE, vect_location,
5968 "Record the vdef for outer-loop vectorization.");
5969
5970 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5971 (to be used when vectorizing outer-loop stmts that use the DEF of
5972 STMT). */
5973 if (gimple_code (stmt) == GIMPLE_PHI)
5974 scalar_dest = PHI_RESULT (stmt);
5975 else
5976 scalar_dest = gimple_assign_lhs (stmt);
5977
5978 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5979 {
5980 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5981 {
5982 exit_phi = USE_STMT (use_p);
5983 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5984 }
5985 }
5986 }
5987
5988 /* Handle stmts whose DEF is used outside the loop-nest that is
5989 being vectorized. */
5990 if (STMT_VINFO_LIVE_P (stmt_info)
5991 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5992 {
5993 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5994 gcc_assert (done);
5995 }
5996
5997 if (vec_stmt)
5998 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5999
6000 return is_store;
6001 }
6002
6003
6004 /* Remove a group of stores (for SLP or interleaving), free their
6005 stmt_vec_info. */
6006
6007 void
6008 vect_remove_stores (gimple first_stmt)
6009 {
6010 gimple next = first_stmt;
6011 gimple tmp;
6012 gimple_stmt_iterator next_si;
6013
6014 while (next)
6015 {
6016 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6017
6018 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6019 if (is_pattern_stmt_p (stmt_info))
6020 next = STMT_VINFO_RELATED_STMT (stmt_info);
6021 /* Free the attached stmt_vec_info and remove the stmt. */
6022 next_si = gsi_for_stmt (next);
6023 unlink_stmt_vdef (next);
6024 gsi_remove (&next_si, true);
6025 release_defs (next);
6026 free_stmt_vec_info (next);
6027 next = tmp;
6028 }
6029 }
6030
6031
6032 /* Function new_stmt_vec_info.
6033
6034 Create and initialize a new stmt_vec_info struct for STMT. */
6035
6036 stmt_vec_info
6037 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
6038 bb_vec_info bb_vinfo)
6039 {
6040 stmt_vec_info res;
6041 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6042
6043 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6044 STMT_VINFO_STMT (res) = stmt;
6045 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
6046 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
6047 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
6048 STMT_VINFO_LIVE_P (res) = false;
6049 STMT_VINFO_VECTYPE (res) = NULL;
6050 STMT_VINFO_VEC_STMT (res) = NULL;
6051 STMT_VINFO_VECTORIZABLE (res) = true;
6052 STMT_VINFO_IN_PATTERN_P (res) = false;
6053 STMT_VINFO_RELATED_STMT (res) = NULL;
6054 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
6055 STMT_VINFO_DATA_REF (res) = NULL;
6056
6057 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6058 STMT_VINFO_DR_OFFSET (res) = NULL;
6059 STMT_VINFO_DR_INIT (res) = NULL;
6060 STMT_VINFO_DR_STEP (res) = NULL;
6061 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6062
6063 if (gimple_code (stmt) == GIMPLE_PHI
6064 && is_loop_header_bb_p (gimple_bb (stmt)))
6065 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6066 else
6067 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6068
6069 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
6070 STMT_SLP_TYPE (res) = loop_vect;
6071 GROUP_FIRST_ELEMENT (res) = NULL;
6072 GROUP_NEXT_ELEMENT (res) = NULL;
6073 GROUP_SIZE (res) = 0;
6074 GROUP_STORE_COUNT (res) = 0;
6075 GROUP_GAP (res) = 0;
6076 GROUP_SAME_DR_STMT (res) = NULL;
6077
6078 return res;
6079 }
6080
6081
6082 /* Create a hash table for stmt_vec_info. */
6083
6084 void
6085 init_stmt_vec_info_vec (void)
6086 {
6087 gcc_assert (!stmt_vec_info_vec.exists ());
6088 stmt_vec_info_vec.create (50);
6089 }
6090
6091
6092 /* Free hash table for stmt_vec_info. */
6093
6094 void
6095 free_stmt_vec_info_vec (void)
6096 {
6097 unsigned int i;
6098 vec_void_p info;
6099 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6100 if (info != NULL)
6101 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6102 gcc_assert (stmt_vec_info_vec.exists ());
6103 stmt_vec_info_vec.release ();
6104 }
6105
6106
6107 /* Free stmt vectorization related info. */
6108
6109 void
6110 free_stmt_vec_info (gimple stmt)
6111 {
6112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6113
6114 if (!stmt_info)
6115 return;
6116
6117 /* Check if this statement has a related "pattern stmt"
6118 (introduced by the vectorizer during the pattern recognition
6119 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6120 too. */
6121 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6122 {
6123 stmt_vec_info patt_info
6124 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6125 if (patt_info)
6126 {
6127 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6128 if (seq)
6129 {
6130 gimple_stmt_iterator si;
6131 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6132 free_stmt_vec_info (gsi_stmt (si));
6133 }
6134 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6135 }
6136 }
6137
6138 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6139 set_vinfo_for_stmt (stmt, NULL);
6140 free (stmt_info);
6141 }
6142
6143
6144 /* Function get_vectype_for_scalar_type_and_size.
6145
6146 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6147 by the target. */
6148
6149 static tree
6150 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6151 {
6152 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6153 enum machine_mode simd_mode;
6154 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6155 int nunits;
6156 tree vectype;
6157
6158 if (nbytes == 0)
6159 return NULL_TREE;
6160
6161 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6162 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6163 return NULL_TREE;
6164
6165 /* For vector types of elements whose mode precision doesn't
6166 match their types precision we use a element type of mode
6167 precision. The vectorization routines will have to make sure
6168 they support the proper result truncation/extension.
6169 We also make sure to build vector types with INTEGER_TYPE
6170 component type only. */
6171 if (INTEGRAL_TYPE_P (scalar_type)
6172 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6173 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6174 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6175 TYPE_UNSIGNED (scalar_type));
6176
6177 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6178 When the component mode passes the above test simply use a type
6179 corresponding to that mode. The theory is that any use that
6180 would cause problems with this will disable vectorization anyway. */
6181 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6182 && !INTEGRAL_TYPE_P (scalar_type)
6183 && !POINTER_TYPE_P (scalar_type))
6184 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6185
6186 /* We can't build a vector type of elements with alignment bigger than
6187 their size. */
6188 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6189 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6190 TYPE_UNSIGNED (scalar_type));
6191
6192 /* If we felt back to using the mode fail if there was
6193 no scalar type for it. */
6194 if (scalar_type == NULL_TREE)
6195 return NULL_TREE;
6196
6197 /* If no size was supplied use the mode the target prefers. Otherwise
6198 lookup a vector mode of the specified size. */
6199 if (size == 0)
6200 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6201 else
6202 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6203 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6204 if (nunits <= 1)
6205 return NULL_TREE;
6206
6207 vectype = build_vector_type (scalar_type, nunits);
6208
6209 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6210 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6211 return NULL_TREE;
6212
6213 return vectype;
6214 }
6215
6216 unsigned int current_vector_size;
6217
6218 /* Function get_vectype_for_scalar_type.
6219
6220 Returns the vector type corresponding to SCALAR_TYPE as supported
6221 by the target. */
6222
6223 tree
6224 get_vectype_for_scalar_type (tree scalar_type)
6225 {
6226 tree vectype;
6227 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6228 current_vector_size);
6229 if (vectype
6230 && current_vector_size == 0)
6231 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6232 return vectype;
6233 }
6234
6235 /* Function get_same_sized_vectype
6236
6237 Returns a vector type corresponding to SCALAR_TYPE of size
6238 VECTOR_TYPE if supported by the target. */
6239
6240 tree
6241 get_same_sized_vectype (tree scalar_type, tree vector_type)
6242 {
6243 return get_vectype_for_scalar_type_and_size
6244 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6245 }
6246
6247 /* Function vect_is_simple_use.
6248
6249 Input:
6250 LOOP_VINFO - the vect info of the loop that is being vectorized.
6251 BB_VINFO - the vect info of the basic block that is being vectorized.
6252 OPERAND - operand of STMT in the loop or bb.
6253 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6254
6255 Returns whether a stmt with OPERAND can be vectorized.
6256 For loops, supportable operands are constants, loop invariants, and operands
6257 that are defined by the current iteration of the loop. Unsupportable
6258 operands are those that are defined by a previous iteration of the loop (as
6259 is the case in reduction/induction computations).
6260 For basic blocks, supportable operands are constants and bb invariants.
6261 For now, operands defined outside the basic block are not supported. */
6262
6263 bool
6264 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6265 bb_vec_info bb_vinfo, gimple *def_stmt,
6266 tree *def, enum vect_def_type *dt)
6267 {
6268 basic_block bb;
6269 stmt_vec_info stmt_vinfo;
6270 struct loop *loop = NULL;
6271
6272 if (loop_vinfo)
6273 loop = LOOP_VINFO_LOOP (loop_vinfo);
6274
6275 *def_stmt = NULL;
6276 *def = NULL_TREE;
6277
6278 if (dump_enabled_p ())
6279 {
6280 dump_printf_loc (MSG_NOTE, vect_location,
6281 "vect_is_simple_use: operand ");
6282 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6283 }
6284
6285 if (CONSTANT_CLASS_P (operand))
6286 {
6287 *dt = vect_constant_def;
6288 return true;
6289 }
6290
6291 if (is_gimple_min_invariant (operand))
6292 {
6293 *def = operand;
6294 *dt = vect_external_def;
6295 return true;
6296 }
6297
6298 if (TREE_CODE (operand) == PAREN_EXPR)
6299 {
6300 if (dump_enabled_p ())
6301 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6302 operand = TREE_OPERAND (operand, 0);
6303 }
6304
6305 if (TREE_CODE (operand) != SSA_NAME)
6306 {
6307 if (dump_enabled_p ())
6308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6309 "not ssa-name.");
6310 return false;
6311 }
6312
6313 *def_stmt = SSA_NAME_DEF_STMT (operand);
6314 if (*def_stmt == NULL)
6315 {
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6318 "no def_stmt.");
6319 return false;
6320 }
6321
6322 if (dump_enabled_p ())
6323 {
6324 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6325 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6326 }
6327
6328 /* Empty stmt is expected only in case of a function argument.
6329 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6330 if (gimple_nop_p (*def_stmt))
6331 {
6332 *def = operand;
6333 *dt = vect_external_def;
6334 return true;
6335 }
6336
6337 bb = gimple_bb (*def_stmt);
6338
6339 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6340 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6341 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6342 *dt = vect_external_def;
6343 else
6344 {
6345 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6346 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6347 }
6348
6349 if (*dt == vect_unknown_def_type
6350 || (stmt
6351 && *dt == vect_double_reduction_def
6352 && gimple_code (stmt) != GIMPLE_PHI))
6353 {
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6356 "Unsupported pattern.");
6357 return false;
6358 }
6359
6360 if (dump_enabled_p ())
6361 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6362
6363 switch (gimple_code (*def_stmt))
6364 {
6365 case GIMPLE_PHI:
6366 *def = gimple_phi_result (*def_stmt);
6367 break;
6368
6369 case GIMPLE_ASSIGN:
6370 *def = gimple_assign_lhs (*def_stmt);
6371 break;
6372
6373 case GIMPLE_CALL:
6374 *def = gimple_call_lhs (*def_stmt);
6375 if (*def != NULL)
6376 break;
6377 /* FALLTHRU */
6378 default:
6379 if (dump_enabled_p ())
6380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6381 "unsupported defining stmt: ");
6382 return false;
6383 }
6384
6385 return true;
6386 }
6387
6388 /* Function vect_is_simple_use_1.
6389
6390 Same as vect_is_simple_use_1 but also determines the vector operand
6391 type of OPERAND and stores it to *VECTYPE. If the definition of
6392 OPERAND is vect_uninitialized_def, vect_constant_def or
6393 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6394 is responsible to compute the best suited vector type for the
6395 scalar operand. */
6396
6397 bool
6398 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6399 bb_vec_info bb_vinfo, gimple *def_stmt,
6400 tree *def, enum vect_def_type *dt, tree *vectype)
6401 {
6402 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6403 def, dt))
6404 return false;
6405
6406 /* Now get a vector type if the def is internal, otherwise supply
6407 NULL_TREE and leave it up to the caller to figure out a proper
6408 type for the use stmt. */
6409 if (*dt == vect_internal_def
6410 || *dt == vect_induction_def
6411 || *dt == vect_reduction_def
6412 || *dt == vect_double_reduction_def
6413 || *dt == vect_nested_cycle)
6414 {
6415 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6416
6417 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6418 && !STMT_VINFO_RELEVANT (stmt_info)
6419 && !STMT_VINFO_LIVE_P (stmt_info))
6420 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6421
6422 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6423 gcc_assert (*vectype != NULL_TREE);
6424 }
6425 else if (*dt == vect_uninitialized_def
6426 || *dt == vect_constant_def
6427 || *dt == vect_external_def)
6428 *vectype = NULL_TREE;
6429 else
6430 gcc_unreachable ();
6431
6432 return true;
6433 }
6434
6435
6436 /* Function supportable_widening_operation
6437
6438 Check whether an operation represented by the code CODE is a
6439 widening operation that is supported by the target platform in
6440 vector form (i.e., when operating on arguments of type VECTYPE_IN
6441 producing a result of type VECTYPE_OUT).
6442
6443 Widening operations we currently support are NOP (CONVERT), FLOAT
6444 and WIDEN_MULT. This function checks if these operations are supported
6445 by the target platform either directly (via vector tree-codes), or via
6446 target builtins.
6447
6448 Output:
6449 - CODE1 and CODE2 are codes of vector operations to be used when
6450 vectorizing the operation, if available.
6451 - MULTI_STEP_CVT determines the number of required intermediate steps in
6452 case of multi-step conversion (like char->short->int - in that case
6453 MULTI_STEP_CVT will be 1).
6454 - INTERM_TYPES contains the intermediate type required to perform the
6455 widening operation (short in the above example). */
6456
6457 bool
6458 supportable_widening_operation (enum tree_code code, gimple stmt,
6459 tree vectype_out, tree vectype_in,
6460 enum tree_code *code1, enum tree_code *code2,
6461 int *multi_step_cvt,
6462 vec<tree> *interm_types)
6463 {
6464 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6465 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6466 struct loop *vect_loop = NULL;
6467 enum machine_mode vec_mode;
6468 enum insn_code icode1, icode2;
6469 optab optab1, optab2;
6470 tree vectype = vectype_in;
6471 tree wide_vectype = vectype_out;
6472 enum tree_code c1, c2;
6473 int i;
6474 tree prev_type, intermediate_type;
6475 enum machine_mode intermediate_mode, prev_mode;
6476 optab optab3, optab4;
6477
6478 *multi_step_cvt = 0;
6479 if (loop_info)
6480 vect_loop = LOOP_VINFO_LOOP (loop_info);
6481
6482 switch (code)
6483 {
6484 case WIDEN_MULT_EXPR:
6485 /* The result of a vectorized widening operation usually requires
6486 two vectors (because the widened results do not fit into one vector).
6487 The generated vector results would normally be expected to be
6488 generated in the same order as in the original scalar computation,
6489 i.e. if 8 results are generated in each vector iteration, they are
6490 to be organized as follows:
6491 vect1: [res1,res2,res3,res4],
6492 vect2: [res5,res6,res7,res8].
6493
6494 However, in the special case that the result of the widening
6495 operation is used in a reduction computation only, the order doesn't
6496 matter (because when vectorizing a reduction we change the order of
6497 the computation). Some targets can take advantage of this and
6498 generate more efficient code. For example, targets like Altivec,
6499 that support widen_mult using a sequence of {mult_even,mult_odd}
6500 generate the following vectors:
6501 vect1: [res1,res3,res5,res7],
6502 vect2: [res2,res4,res6,res8].
6503
6504 When vectorizing outer-loops, we execute the inner-loop sequentially
6505 (each vectorized inner-loop iteration contributes to VF outer-loop
6506 iterations in parallel). We therefore don't allow to change the
6507 order of the computation in the inner-loop during outer-loop
6508 vectorization. */
6509 /* TODO: Another case in which order doesn't *really* matter is when we
6510 widen and then contract again, e.g. (short)((int)x * y >> 8).
6511 Normally, pack_trunc performs an even/odd permute, whereas the
6512 repack from an even/odd expansion would be an interleave, which
6513 would be significantly simpler for e.g. AVX2. */
6514 /* In any case, in order to avoid duplicating the code below, recurse
6515 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6516 are properly set up for the caller. If we fail, we'll continue with
6517 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6518 if (vect_loop
6519 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6520 && !nested_in_vect_loop_p (vect_loop, stmt)
6521 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6522 stmt, vectype_out, vectype_in,
6523 code1, code2, multi_step_cvt,
6524 interm_types))
6525 return true;
6526 c1 = VEC_WIDEN_MULT_LO_EXPR;
6527 c2 = VEC_WIDEN_MULT_HI_EXPR;
6528 break;
6529
6530 case VEC_WIDEN_MULT_EVEN_EXPR:
6531 /* Support the recursion induced just above. */
6532 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6533 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6534 break;
6535
6536 case WIDEN_LSHIFT_EXPR:
6537 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6538 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6539 break;
6540
6541 CASE_CONVERT:
6542 c1 = VEC_UNPACK_LO_EXPR;
6543 c2 = VEC_UNPACK_HI_EXPR;
6544 break;
6545
6546 case FLOAT_EXPR:
6547 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6548 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6549 break;
6550
6551 case FIX_TRUNC_EXPR:
6552 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6553 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6554 computing the operation. */
6555 return false;
6556
6557 default:
6558 gcc_unreachable ();
6559 }
6560
6561 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6562 {
6563 enum tree_code ctmp = c1;
6564 c1 = c2;
6565 c2 = ctmp;
6566 }
6567
6568 if (code == FIX_TRUNC_EXPR)
6569 {
6570 /* The signedness is determined from output operand. */
6571 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6572 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6573 }
6574 else
6575 {
6576 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6577 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6578 }
6579
6580 if (!optab1 || !optab2)
6581 return false;
6582
6583 vec_mode = TYPE_MODE (vectype);
6584 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6585 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6586 return false;
6587
6588 *code1 = c1;
6589 *code2 = c2;
6590
6591 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6592 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6593 return true;
6594
6595 /* Check if it's a multi-step conversion that can be done using intermediate
6596 types. */
6597
6598 prev_type = vectype;
6599 prev_mode = vec_mode;
6600
6601 if (!CONVERT_EXPR_CODE_P (code))
6602 return false;
6603
6604 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6605 intermediate steps in promotion sequence. We try
6606 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6607 not. */
6608 interm_types->create (MAX_INTERM_CVT_STEPS);
6609 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6610 {
6611 intermediate_mode = insn_data[icode1].operand[0].mode;
6612 intermediate_type
6613 = lang_hooks.types.type_for_mode (intermediate_mode,
6614 TYPE_UNSIGNED (prev_type));
6615 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6616 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6617
6618 if (!optab3 || !optab4
6619 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6620 || insn_data[icode1].operand[0].mode != intermediate_mode
6621 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6622 || insn_data[icode2].operand[0].mode != intermediate_mode
6623 || ((icode1 = optab_handler (optab3, intermediate_mode))
6624 == CODE_FOR_nothing)
6625 || ((icode2 = optab_handler (optab4, intermediate_mode))
6626 == CODE_FOR_nothing))
6627 break;
6628
6629 interm_types->quick_push (intermediate_type);
6630 (*multi_step_cvt)++;
6631
6632 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6633 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6634 return true;
6635
6636 prev_type = intermediate_type;
6637 prev_mode = intermediate_mode;
6638 }
6639
6640 interm_types->release ();
6641 return false;
6642 }
6643
6644
6645 /* Function supportable_narrowing_operation
6646
6647 Check whether an operation represented by the code CODE is a
6648 narrowing operation that is supported by the target platform in
6649 vector form (i.e., when operating on arguments of type VECTYPE_IN
6650 and producing a result of type VECTYPE_OUT).
6651
6652 Narrowing operations we currently support are NOP (CONVERT) and
6653 FIX_TRUNC. This function checks if these operations are supported by
6654 the target platform directly via vector tree-codes.
6655
6656 Output:
6657 - CODE1 is the code of a vector operation to be used when
6658 vectorizing the operation, if available.
6659 - MULTI_STEP_CVT determines the number of required intermediate steps in
6660 case of multi-step conversion (like int->short->char - in that case
6661 MULTI_STEP_CVT will be 1).
6662 - INTERM_TYPES contains the intermediate type required to perform the
6663 narrowing operation (short in the above example). */
6664
6665 bool
6666 supportable_narrowing_operation (enum tree_code code,
6667 tree vectype_out, tree vectype_in,
6668 enum tree_code *code1, int *multi_step_cvt,
6669 vec<tree> *interm_types)
6670 {
6671 enum machine_mode vec_mode;
6672 enum insn_code icode1;
6673 optab optab1, interm_optab;
6674 tree vectype = vectype_in;
6675 tree narrow_vectype = vectype_out;
6676 enum tree_code c1;
6677 tree intermediate_type;
6678 enum machine_mode intermediate_mode, prev_mode;
6679 int i;
6680 bool uns;
6681
6682 *multi_step_cvt = 0;
6683 switch (code)
6684 {
6685 CASE_CONVERT:
6686 c1 = VEC_PACK_TRUNC_EXPR;
6687 break;
6688
6689 case FIX_TRUNC_EXPR:
6690 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6691 break;
6692
6693 case FLOAT_EXPR:
6694 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6695 tree code and optabs used for computing the operation. */
6696 return false;
6697
6698 default:
6699 gcc_unreachable ();
6700 }
6701
6702 if (code == FIX_TRUNC_EXPR)
6703 /* The signedness is determined from output operand. */
6704 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6705 else
6706 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6707
6708 if (!optab1)
6709 return false;
6710
6711 vec_mode = TYPE_MODE (vectype);
6712 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6713 return false;
6714
6715 *code1 = c1;
6716
6717 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6718 return true;
6719
6720 /* Check if it's a multi-step conversion that can be done using intermediate
6721 types. */
6722 prev_mode = vec_mode;
6723 if (code == FIX_TRUNC_EXPR)
6724 uns = TYPE_UNSIGNED (vectype_out);
6725 else
6726 uns = TYPE_UNSIGNED (vectype);
6727
6728 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6729 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6730 costly than signed. */
6731 if (code == FIX_TRUNC_EXPR && uns)
6732 {
6733 enum insn_code icode2;
6734
6735 intermediate_type
6736 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6737 interm_optab
6738 = optab_for_tree_code (c1, intermediate_type, optab_default);
6739 if (interm_optab != unknown_optab
6740 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6741 && insn_data[icode1].operand[0].mode
6742 == insn_data[icode2].operand[0].mode)
6743 {
6744 uns = false;
6745 optab1 = interm_optab;
6746 icode1 = icode2;
6747 }
6748 }
6749
6750 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6751 intermediate steps in promotion sequence. We try
6752 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6753 interm_types->create (MAX_INTERM_CVT_STEPS);
6754 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6755 {
6756 intermediate_mode = insn_data[icode1].operand[0].mode;
6757 intermediate_type
6758 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6759 interm_optab
6760 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6761 optab_default);
6762 if (!interm_optab
6763 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6764 || insn_data[icode1].operand[0].mode != intermediate_mode
6765 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6766 == CODE_FOR_nothing))
6767 break;
6768
6769 interm_types->quick_push (intermediate_type);
6770 (*multi_step_cvt)++;
6771
6772 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6773 return true;
6774
6775 prev_mode = intermediate_mode;
6776 optab1 = interm_optab;
6777 }
6778
6779 interm_types->release ();
6780 return false;
6781 }