fixed-bit.c, [...]: Fix comment typos.
[gcc.git] / gcc / tree-vect-transform.c
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
47
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *);
53 static tree vect_create_addr_base_for_vector_ref
54 (tree, tree *, tree, struct loop *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree, block_stmt_iterator *);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
63
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static int vect_min_worthwhile_factor (enum tree_code);
73
74
75 static int
76 cost_for_stmt (tree stmt)
77 {
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
79
80 switch (STMT_VINFO_TYPE (stmt_info))
81 {
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
97 default:
98 gcc_unreachable ();
99 }
100 }
101
102
103 /* Function vect_estimate_min_profitable_iters
104
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
107 loop.
108
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
111
112 int
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
114 {
115 int i;
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
122 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
125 int nbbs = loop->num_nodes;
126 int byte_misalign;
127 int peel_guard_costs = 0;
128 int innerloop_iters = 0, factor;
129 VEC (slp_instance, heap) *slp_instances;
130 slp_instance instance;
131
132 /* Cost model disabled. */
133 if (!flag_vect_cost_model)
134 {
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "cost model disabled.");
137 return 0;
138 }
139
140 /* Requires loop versioning tests to handle misalignment. */
141
142 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
143 {
144 /* FIXME: Make cost depend on complexity of individual check. */
145 vec_outside_cost +=
146 VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
147 if (vect_print_dump_info (REPORT_DETAILS))
148 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
149 "versioning to treat misalignment.\n");
150 }
151
152 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
153 {
154 /* FIXME: Make cost depend on complexity of individual check. */
155 vec_outside_cost +=
156 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
159 "versioning aliasing.\n");
160 }
161
162 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
163 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
164 {
165 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
166 }
167
168 /* Count statements in scalar loop. Using this as scalar cost for a single
169 iteration for now.
170
171 TODO: Add outer loop support.
172
173 TODO: Consider assigning different costs to different scalar
174 statements. */
175
176 /* FORNOW. */
177 if (loop->inner)
178 innerloop_iters = 50; /* FIXME */
179
180 for (i = 0; i < nbbs; i++)
181 {
182 block_stmt_iterator si;
183 basic_block bb = bbs[i];
184
185 if (bb->loop_father == loop->inner)
186 factor = innerloop_iters;
187 else
188 factor = 1;
189
190 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
191 {
192 tree stmt = bsi_stmt (si);
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 if (!STMT_VINFO_RELEVANT_P (stmt_info)
195 && !STMT_VINFO_LIVE_P (stmt_info))
196 continue;
197 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
198 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
199 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
200 some of the "outside" costs are generated inside the outer-loop. */
201 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
202 }
203 }
204
205 /* Add additional cost for the peeled instructions in prologue and epilogue
206 loop.
207
208 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
209 at compile-time - we assume it's vf/2 (the worst would be vf-1).
210
211 TODO: Build an expression that represents peel_iters for prologue and
212 epilogue to be used in a run-time test. */
213
214 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
215
216 if (byte_misalign < 0)
217 {
218 peel_iters_prologue = vf/2;
219 if (vect_print_dump_info (REPORT_DETAILS))
220 fprintf (vect_dump, "cost model: "
221 "prologue peel iters set to vf/2.");
222
223 /* If peeling for alignment is unknown, loop bound of main loop becomes
224 unknown. */
225 peel_iters_epilogue = vf/2;
226 if (vect_print_dump_info (REPORT_DETAILS))
227 fprintf (vect_dump, "cost model: "
228 "epilogue peel iters set to vf/2 because "
229 "peeling for alignment is unknown .");
230
231 /* If peeled iterations are unknown, count a taken branch and a not taken
232 branch per peeled loop. Even if scalar loop iterations are known,
233 vector iterations are not known since peeled prologue iterations are
234 not known. Hence guards remain the same. */
235 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
236 + TARG_COND_NOT_TAKEN_BRANCH_COST);
237
238 }
239 else
240 {
241 if (byte_misalign)
242 {
243 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
244 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
245 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
246 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
247
248 peel_iters_prologue = nelements - (byte_misalign / element_size);
249 }
250 else
251 peel_iters_prologue = 0;
252
253 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
254 {
255 peel_iters_epilogue = vf/2;
256 if (vect_print_dump_info (REPORT_DETAILS))
257 fprintf (vect_dump, "cost model: "
258 "epilogue peel iters set to vf/2 because "
259 "loop iterations are unknown .");
260
261 /* If peeled iterations are known but number of scalar loop
262 iterations are unknown, count a taken branch per peeled loop. */
263 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
264
265 }
266 else
267 {
268 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
269 peel_iters_prologue = niters < peel_iters_prologue ?
270 niters : peel_iters_prologue;
271 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
272 }
273 }
274
275 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
276 + (peel_iters_epilogue * scalar_single_iter_cost)
277 + peel_guard_costs;
278
279 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
280 information we provide for the target is whether testing against the
281 threshold involves a runtime test. */
282 if (targetm.vectorize.builtin_vectorization_cost)
283 {
284 bool runtime_test = false;
285
286 /* If the number of iterations is unknown, or the
287 peeling-for-misalignment amount is unknown, we eill have to generate
288 a runtime test to test the loop count against the threshold. */
289 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
290 || (byte_misalign < 0))
291 runtime_test = true;
292 vec_outside_cost +=
293 targetm.vectorize.builtin_vectorization_cost (runtime_test);
294 if (vect_print_dump_info (REPORT_DETAILS))
295 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
296 targetm.vectorize.builtin_vectorization_cost (runtime_test));
297 }
298
299 /* Add SLP costs. */
300 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
301 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
302 {
303 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
304 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
305 }
306
307 /* Calculate number of iterations required to make the vector version
308 profitable, relative to the loop bodies only. The following condition
309 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
310 SIC = scalar iteration cost, VIC = vector iteration cost,
311 VOC = vector outside cost and VF = vectorization factor. */
312
313 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
314 {
315 if (vec_outside_cost <= 0)
316 min_profitable_iters = 1;
317 else
318 {
319 min_profitable_iters = (vec_outside_cost * vf
320 - vec_inside_cost * peel_iters_prologue
321 - vec_inside_cost * peel_iters_epilogue)
322 / ((scalar_single_iter_cost * vf)
323 - vec_inside_cost);
324
325 if ((scalar_single_iter_cost * vf * min_profitable_iters)
326 <= ((vec_inside_cost * min_profitable_iters)
327 + (vec_outside_cost * vf)))
328 min_profitable_iters++;
329 }
330 }
331 /* vector version will never be profitable. */
332 else
333 {
334 if (vect_print_dump_info (REPORT_DETAILS))
335 fprintf (vect_dump, "cost model: vector iteration cost = %d "
336 "is divisible by scalar iteration cost = %d by a factor "
337 "greater than or equal to the vectorization factor = %d .",
338 vec_inside_cost, scalar_single_iter_cost, vf);
339 return -1;
340 }
341
342 if (vect_print_dump_info (REPORT_DETAILS))
343 {
344 fprintf (vect_dump, "Cost model analysis: \n");
345 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
346 vec_inside_cost);
347 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
348 vec_outside_cost);
349 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
350 fprintf (vect_dump, " prologue iterations: %d\n",
351 peel_iters_prologue);
352 fprintf (vect_dump, " epilogue iterations: %d\n",
353 peel_iters_epilogue);
354 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
355 min_profitable_iters);
356 }
357
358 min_profitable_iters =
359 min_profitable_iters < vf ? vf : min_profitable_iters;
360
361 /* Because the condition we create is:
362 if (niters <= min_profitable_iters)
363 then skip the vectorized loop. */
364 min_profitable_iters--;
365
366 if (vect_print_dump_info (REPORT_DETAILS))
367 fprintf (vect_dump, " Profitability threshold = %d\n",
368 min_profitable_iters);
369
370 return min_profitable_iters;
371 }
372
373
374 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
375 functions. Design better to avoid maintenance issues. */
376
377 /* Function vect_model_reduction_cost.
378
379 Models cost for a reduction operation, including the vector ops
380 generated within the strip-mine loop, the initial definition before
381 the loop, and the epilogue code that must be generated. */
382
383 static void
384 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
385 int ncopies)
386 {
387 int outer_cost = 0;
388 enum tree_code code;
389 optab optab;
390 tree vectype;
391 tree orig_stmt;
392 tree reduction_op;
393 enum machine_mode mode;
394 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
395 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
396 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
398
399 /* Cost of reduction op inside loop. */
400 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
401
402 reduction_op = TREE_OPERAND (operation, op_type-1);
403 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
404 mode = TYPE_MODE (vectype);
405 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
406
407 if (!orig_stmt)
408 orig_stmt = STMT_VINFO_STMT (stmt_info);
409
410 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
411
412 /* Add in cost for initial definition. */
413 outer_cost += TARG_SCALAR_TO_VEC_COST;
414
415 /* Determine cost of epilogue code.
416
417 We have a reduction operator that will reduce the vector in one statement.
418 Also requires scalar extract. */
419
420 if (!nested_in_vect_loop_p (loop, orig_stmt))
421 {
422 if (reduc_code < NUM_TREE_CODES)
423 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
424 else
425 {
426 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
427 tree bitsize =
428 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
429 int element_bitsize = tree_low_cst (bitsize, 1);
430 int nelements = vec_size_in_bits / element_bitsize;
431
432 optab = optab_for_tree_code (code, vectype);
433
434 /* We have a whole vector shift available. */
435 if (VECTOR_MODE_P (mode)
436 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
437 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
438 /* Final reduction via vector shifts and the reduction operator. Also
439 requires scalar extract. */
440 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
441 + TARG_VEC_TO_SCALAR_COST);
442 else
443 /* Use extracts and reduction op for final reduction. For N elements,
444 we have N extracts and N-1 reduction ops. */
445 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
446 }
447 }
448
449 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
450
451 if (vect_print_dump_info (REPORT_DETAILS))
452 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
453 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
454 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
455 }
456
457
458 /* Function vect_model_induction_cost.
459
460 Models cost for induction operations. */
461
462 static void
463 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
464 {
465 /* loop cost for vec_loop. */
466 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
467 /* prologue cost for vec_init and vec_step. */
468 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
469
470 if (vect_print_dump_info (REPORT_DETAILS))
471 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
472 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
473 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
474 }
475
476
477 /* Function vect_model_simple_cost.
478
479 Models cost for simple operations, i.e. those that only emit ncopies of a
480 single op. Right now, this does not account for multiple insns that could
481 be generated for the single vector op. We will handle that shortly. */
482
483 void
484 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
485 enum vect_def_type *dt, slp_tree slp_node)
486 {
487 int i;
488 int inside_cost = 0, outside_cost = 0;
489
490 inside_cost = ncopies * TARG_VEC_STMT_COST;
491
492 /* FORNOW: Assuming maximum 2 args per stmts. */
493 for (i = 0; i < 2; i++)
494 {
495 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
496 outside_cost += TARG_SCALAR_TO_VEC_COST;
497 }
498
499 if (vect_print_dump_info (REPORT_DETAILS))
500 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
501 "outside_cost = %d .", inside_cost, outside_cost);
502
503 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
504 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
505 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
506 }
507
508
509 /* Function vect_cost_strided_group_size
510
511 For strided load or store, return the group_size only if it is the first
512 load or store of a group, else return 1. This ensures that group size is
513 only returned once per group. */
514
515 static int
516 vect_cost_strided_group_size (stmt_vec_info stmt_info)
517 {
518 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
519
520 if (first_stmt == STMT_VINFO_STMT (stmt_info))
521 return DR_GROUP_SIZE (stmt_info);
522
523 return 1;
524 }
525
526
527 /* Function vect_model_store_cost
528
529 Models cost for stores. In the case of strided accesses, one access
530 has the overhead of the strided access attributed to it. */
531
532 void
533 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
534 enum vect_def_type dt, slp_tree slp_node)
535 {
536 int group_size;
537 int inside_cost = 0, outside_cost = 0;
538
539 if (dt == vect_constant_def || dt == vect_invariant_def)
540 outside_cost = TARG_SCALAR_TO_VEC_COST;
541
542 /* Strided access? */
543 if (DR_GROUP_FIRST_DR (stmt_info))
544 group_size = vect_cost_strided_group_size (stmt_info);
545 /* Not a strided access. */
546 else
547 group_size = 1;
548
549 /* Is this an access in a group of stores, which provide strided access?
550 If so, add in the cost of the permutes. */
551 if (group_size > 1)
552 {
553 /* Uses a high and low interleave operation for each needed permute. */
554 inside_cost = ncopies * exact_log2(group_size) * group_size
555 * TARG_VEC_STMT_COST;
556
557 if (vect_print_dump_info (REPORT_DETAILS))
558 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
559 group_size);
560
561 }
562
563 /* Costs of the stores. */
564 inside_cost += ncopies * TARG_VEC_STORE_COST;
565
566 if (vect_print_dump_info (REPORT_DETAILS))
567 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
568 "outside_cost = %d .", inside_cost, outside_cost);
569
570 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
571 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
572 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
573 }
574
575
576 /* Function vect_model_load_cost
577
578 Models cost for loads. In the case of strided accesses, the last access
579 has the overhead of the strided access attributed to it. Since unaligned
580 accesses are supported for loads, we also account for the costs of the
581 access scheme chosen. */
582
583 void
584 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
585
586 {
587 int group_size;
588 int alignment_support_cheme;
589 tree first_stmt;
590 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
591 int inside_cost = 0, outside_cost = 0;
592
593 /* Strided accesses? */
594 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
595 if (first_stmt && !slp_node)
596 {
597 group_size = vect_cost_strided_group_size (stmt_info);
598 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
599 }
600 /* Not a strided access. */
601 else
602 {
603 group_size = 1;
604 first_dr = dr;
605 }
606
607 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
608
609 /* Is this an access in a group of loads providing strided access?
610 If so, add in the cost of the permutes. */
611 if (group_size > 1)
612 {
613 /* Uses an even and odd extract operations for each needed permute. */
614 inside_cost = ncopies * exact_log2(group_size) * group_size
615 * TARG_VEC_STMT_COST;
616
617 if (vect_print_dump_info (REPORT_DETAILS))
618 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
619 group_size);
620
621 }
622
623 /* The loads themselves. */
624 switch (alignment_support_cheme)
625 {
626 case dr_aligned:
627 {
628 inside_cost += ncopies * TARG_VEC_LOAD_COST;
629
630 if (vect_print_dump_info (REPORT_DETAILS))
631 fprintf (vect_dump, "vect_model_load_cost: aligned.");
632
633 break;
634 }
635 case dr_unaligned_supported:
636 {
637 /* Here, we assign an additional cost for the unaligned load. */
638 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
639
640 if (vect_print_dump_info (REPORT_DETAILS))
641 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
642 "hardware.");
643
644 break;
645 }
646 case dr_explicit_realign:
647 {
648 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
649
650 /* FIXME: If the misalignment remains fixed across the iterations of
651 the containing loop, the following cost should be added to the
652 outside costs. */
653 if (targetm.vectorize.builtin_mask_for_load)
654 inside_cost += TARG_VEC_STMT_COST;
655
656 break;
657 }
658 case dr_explicit_realign_optimized:
659 {
660 if (vect_print_dump_info (REPORT_DETAILS))
661 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
662 "pipelined.");
663
664 /* Unaligned software pipeline has a load of an address, an initial
665 load, and possibly a mask operation to "prime" the loop. However,
666 if this is an access in a group of loads, which provide strided
667 access, then the above cost should only be considered for one
668 access in the group. Inside the loop, there is a load op
669 and a realignment op. */
670
671 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
672 {
673 outside_cost = 2*TARG_VEC_STMT_COST;
674 if (targetm.vectorize.builtin_mask_for_load)
675 outside_cost += TARG_VEC_STMT_COST;
676 }
677
678 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
679
680 break;
681 }
682
683 default:
684 gcc_unreachable ();
685 }
686
687 if (vect_print_dump_info (REPORT_DETAILS))
688 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
689 "outside_cost = %d .", inside_cost, outside_cost);
690
691 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
692 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
693 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
694 }
695
696
697 /* Function vect_get_new_vect_var.
698
699 Returns a name for a new variable. The current naming scheme appends the
700 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
701 the name of vectorizer generated variables, and appends that to NAME if
702 provided. */
703
704 static tree
705 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
706 {
707 const char *prefix;
708 tree new_vect_var;
709
710 switch (var_kind)
711 {
712 case vect_simple_var:
713 prefix = "vect_";
714 break;
715 case vect_scalar_var:
716 prefix = "stmp_";
717 break;
718 case vect_pointer_var:
719 prefix = "vect_p";
720 break;
721 default:
722 gcc_unreachable ();
723 }
724
725 if (name)
726 {
727 char* tmp = concat (prefix, name, NULL);
728 new_vect_var = create_tmp_var (type, tmp);
729 free (tmp);
730 }
731 else
732 new_vect_var = create_tmp_var (type, prefix);
733
734 /* Mark vector typed variable as a gimple register variable. */
735 if (TREE_CODE (type) == VECTOR_TYPE)
736 DECL_GIMPLE_REG_P (new_vect_var) = true;
737
738 return new_vect_var;
739 }
740
741
742 /* Function vect_create_addr_base_for_vector_ref.
743
744 Create an expression that computes the address of the first memory location
745 that will be accessed for a data reference.
746
747 Input:
748 STMT: The statement containing the data reference.
749 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
750 OFFSET: Optional. If supplied, it is be added to the initial address.
751 LOOP: Specify relative to which loop-nest should the address be computed.
752 For example, when the dataref is in an inner-loop nested in an
753 outer-loop that is now being vectorized, LOOP can be either the
754 outer-loop, or the inner-loop. The first memory location accessed
755 by the following dataref ('in' points to short):
756
757 for (i=0; i<N; i++)
758 for (j=0; j<M; j++)
759 s += in[i+j]
760
761 is as follows:
762 if LOOP=i_loop: &in (relative to i_loop)
763 if LOOP=j_loop: &in+i*2B (relative to j_loop)
764
765 Output:
766 1. Return an SSA_NAME whose value is the address of the memory location of
767 the first vector of the data reference.
768 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
769 these statement(s) which define the returned SSA_NAME.
770
771 FORNOW: We are only handling array accesses with step 1. */
772
773 static tree
774 vect_create_addr_base_for_vector_ref (tree stmt,
775 tree *new_stmt_list,
776 tree offset,
777 struct loop *loop)
778 {
779 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
780 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
781 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
782 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
783 tree base_name;
784 tree data_ref_base_var;
785 tree new_base_stmt;
786 tree vec_stmt;
787 tree addr_base, addr_expr;
788 tree dest, new_stmt;
789 tree base_offset = unshare_expr (DR_OFFSET (dr));
790 tree init = unshare_expr (DR_INIT (dr));
791 tree vect_ptr_type, addr_expr2;
792 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
793
794 gcc_assert (loop);
795 if (loop != containing_loop)
796 {
797 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
799
800 gcc_assert (nested_in_vect_loop_p (loop, stmt));
801
802 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
803 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
804 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
805 }
806
807 /* Create data_ref_base */
808 base_name = build_fold_indirect_ref (data_ref_base);
809 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
810 add_referenced_var (data_ref_base_var);
811 data_ref_base = force_gimple_operand (data_ref_base, &new_base_stmt,
812 true, data_ref_base_var);
813 append_to_statement_list_force(new_base_stmt, new_stmt_list);
814
815 /* Create base_offset */
816 base_offset = size_binop (PLUS_EXPR, base_offset, init);
817 base_offset = fold_convert (sizetype, base_offset);
818 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
819 add_referenced_var (dest);
820 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
821 append_to_statement_list_force (new_stmt, new_stmt_list);
822
823 if (offset)
824 {
825 tree tmp = create_tmp_var (sizetype, "offset");
826
827 add_referenced_var (tmp);
828 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
829 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
830 base_offset, offset);
831 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
832 append_to_statement_list_force (new_stmt, new_stmt_list);
833 }
834
835 /* base + base_offset */
836 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
837 data_ref_base, base_offset);
838
839 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
840
841 /* addr_expr = addr_base */
842 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
843 get_name (base_name));
844 add_referenced_var (addr_expr);
845 vec_stmt = fold_convert (vect_ptr_type, addr_base);
846 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
847 get_name (base_name));
848 add_referenced_var (addr_expr2);
849 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
850 append_to_statement_list_force (new_stmt, new_stmt_list);
851
852 if (vect_print_dump_info (REPORT_DETAILS))
853 {
854 fprintf (vect_dump, "created ");
855 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
856 }
857 return vec_stmt;
858 }
859
860
861 /* Function vect_create_data_ref_ptr.
862
863 Create a new pointer to vector type (vp), that points to the first location
864 accessed in the loop by STMT, along with the def-use update chain to
865 appropriately advance the pointer through the loop iterations. Also set
866 aliasing information for the pointer. This vector pointer is used by the
867 callers to this function to create a memory reference expression for vector
868 load/store access.
869
870 Input:
871 1. STMT: a stmt that references memory. Expected to be of the form
872 GIMPLE_MODIFY_STMT <name, data-ref> or
873 GIMPLE_MODIFY_STMT <data-ref, name>.
874 2. AT_LOOP: the loop where the vector memref is to be created.
875 3. OFFSET (optional): an offset to be added to the initial address accessed
876 by the data-ref in STMT.
877 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
878 pointing to the initial address.
879 5. TYPE: if not NULL indicates the required type of the data-ref
880
881 Output:
882 1. Declare a new ptr to vector_type, and have it point to the base of the
883 data reference (initial addressed accessed by the data reference).
884 For example, for vector of type V8HI, the following code is generated:
885
886 v8hi *vp;
887 vp = (v8hi *)initial_address;
888
889 if OFFSET is not supplied:
890 initial_address = &a[init];
891 if OFFSET is supplied:
892 initial_address = &a[init + OFFSET];
893
894 Return the initial_address in INITIAL_ADDRESS.
895
896 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
897 update the pointer in each iteration of the loop.
898
899 Return the increment stmt that updates the pointer in PTR_INCR.
900
901 3. Set INV_P to true if the access pattern of the data reference in the
902 vectorized loop is invariant. Set it to false otherwise.
903
904 4. Return the pointer. */
905
906 static tree
907 vect_create_data_ref_ptr (tree stmt, struct loop *at_loop,
908 tree offset, tree *initial_address, tree *ptr_incr,
909 bool only_init, tree type, bool *inv_p)
910 {
911 tree base_name;
912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
913 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
914 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
915 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
916 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
917 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
918 tree vect_ptr_type;
919 tree vect_ptr;
920 tree tag;
921 tree new_temp;
922 tree vec_stmt;
923 tree new_stmt_list = NULL_TREE;
924 edge pe;
925 basic_block new_bb;
926 tree vect_ptr_init;
927 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
928 tree vptr;
929 block_stmt_iterator incr_bsi;
930 bool insert_after;
931 tree indx_before_incr, indx_after_incr;
932 tree incr;
933 tree step;
934
935 /* Check the step (evolution) of the load in LOOP, and record
936 whether it's invariant. */
937 if (nested_in_vect_loop)
938 step = STMT_VINFO_DR_STEP (stmt_info);
939 else
940 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
941
942 if (tree_int_cst_compare (step, size_zero_node) == 0)
943 *inv_p = true;
944 else
945 *inv_p = false;
946
947 /* Create an expression for the first address accessed by this load
948 in LOOP. */
949 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
950
951 if (vect_print_dump_info (REPORT_DETAILS))
952 {
953 tree data_ref_base = base_name;
954 fprintf (vect_dump, "create vector-pointer variable to type: ");
955 print_generic_expr (vect_dump, vectype, TDF_SLIM);
956 if (TREE_CODE (data_ref_base) == VAR_DECL)
957 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
958 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
959 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
960 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
961 fprintf (vect_dump, " vectorizing a record based array ref: ");
962 else if (TREE_CODE (data_ref_base) == SSA_NAME)
963 fprintf (vect_dump, " vectorizing a pointer ref: ");
964 print_generic_expr (vect_dump, base_name, TDF_SLIM);
965 }
966
967 /** (1) Create the new vector-pointer variable: **/
968 if (type)
969 vect_ptr_type = build_pointer_type (type);
970 else
971 vect_ptr_type = build_pointer_type (vectype);
972 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
973 get_name (base_name));
974 add_referenced_var (vect_ptr);
975
976 /** (2) Add aliasing information to the new vector-pointer:
977 (The points-to info (DR_PTR_INFO) may be defined later.) **/
978
979 tag = DR_SYMBOL_TAG (dr);
980 gcc_assert (tag);
981
982 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
983 tag must be created with tag added to its may alias list. */
984 if (!MTAG_P (tag))
985 new_type_alias (vect_ptr, tag, DR_REF (dr));
986 else
987 set_symbol_mem_tag (vect_ptr, tag);
988
989 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
990
991 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
992 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
993 def-use update cycles for the pointer: One relative to the outer-loop
994 (LOOP), which is what steps (3) and (4) below do. The other is relative
995 to the inner-loop (which is the inner-most loop containing the dataref),
996 and this is done be step (5) below.
997
998 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
999 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1000 redundant. Steps (3),(4) create the following:
1001
1002 vp0 = &base_addr;
1003 LOOP: vp1 = phi(vp0,vp2)
1004 ...
1005 ...
1006 vp2 = vp1 + step
1007 goto LOOP
1008
1009 If there is an inner-loop nested in loop, then step (5) will also be
1010 applied, and an additional update in the inner-loop will be created:
1011
1012 vp0 = &base_addr;
1013 LOOP: vp1 = phi(vp0,vp2)
1014 ...
1015 inner: vp3 = phi(vp1,vp4)
1016 vp4 = vp3 + inner_step
1017 if () goto inner
1018 ...
1019 vp2 = vp1 + step
1020 if () goto LOOP */
1021
1022 /** (3) Calculate the initial address the vector-pointer, and set
1023 the vector-pointer to point to it before the loop: **/
1024
1025 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1026
1027 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1028 offset, loop);
1029 pe = loop_preheader_edge (loop);
1030 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1031 gcc_assert (!new_bb);
1032 *initial_address = new_temp;
1033
1034 /* Create: p = (vectype *) initial_base */
1035 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1036 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
1037 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1038 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
1039 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1040 gcc_assert (!new_bb);
1041
1042
1043 /** (4) Handle the updating of the vector-pointer inside the loop.
1044 This is needed when ONLY_INIT is false, and also when AT_LOOP
1045 is the inner-loop nested in LOOP (during outer-loop vectorization).
1046 **/
1047
1048 if (only_init && at_loop == loop) /* No update in loop is required. */
1049 {
1050 /* Copy the points-to information if it exists. */
1051 if (DR_PTR_INFO (dr))
1052 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1053 vptr = vect_ptr_init;
1054 }
1055 else
1056 {
1057 /* The step of the vector pointer is the Vector Size. */
1058 tree step = TYPE_SIZE_UNIT (vectype);
1059 /* One exception to the above is when the scalar step of the load in
1060 LOOP is zero. In this case the step here is also zero. */
1061 if (*inv_p)
1062 step = size_zero_node;
1063
1064 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1065
1066 create_iv (vect_ptr_init,
1067 fold_convert (vect_ptr_type, step),
1068 NULL_TREE, loop, &incr_bsi, insert_after,
1069 &indx_before_incr, &indx_after_incr);
1070 incr = bsi_stmt (incr_bsi);
1071 set_stmt_info (stmt_ann (incr),
1072 new_stmt_vec_info (incr, loop_vinfo));
1073
1074 /* Copy the points-to information if it exists. */
1075 if (DR_PTR_INFO (dr))
1076 {
1077 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1078 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1079 }
1080 merge_alias_info (vect_ptr_init, indx_before_incr);
1081 merge_alias_info (vect_ptr_init, indx_after_incr);
1082 if (ptr_incr)
1083 *ptr_incr = incr;
1084
1085 vptr = indx_before_incr;
1086 }
1087
1088 if (!nested_in_vect_loop || only_init)
1089 return vptr;
1090
1091
1092 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1093 nested in LOOP, if exists: **/
1094
1095 gcc_assert (nested_in_vect_loop);
1096 if (!only_init)
1097 {
1098 standard_iv_increment_position (containing_loop, &incr_bsi,
1099 &insert_after);
1100 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1101 containing_loop, &incr_bsi, insert_after, &indx_before_incr,
1102 &indx_after_incr);
1103 incr = bsi_stmt (incr_bsi);
1104 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1105
1106 /* Copy the points-to information if it exists. */
1107 if (DR_PTR_INFO (dr))
1108 {
1109 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1110 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1111 }
1112 merge_alias_info (vect_ptr_init, indx_before_incr);
1113 merge_alias_info (vect_ptr_init, indx_after_incr);
1114 if (ptr_incr)
1115 *ptr_incr = incr;
1116
1117 return indx_before_incr;
1118 }
1119 else
1120 gcc_unreachable ();
1121 }
1122
1123
1124 /* Function bump_vector_ptr
1125
1126 Increment a pointer (to a vector type) by vector-size. If requested,
1127 i.e. if PTR-INCR is given, then also connect the new increment stmt
1128 to the existing def-use update-chain of the pointer, by modifying
1129 the PTR_INCR as illustrated below:
1130
1131 The pointer def-use update-chain before this function:
1132 DATAREF_PTR = phi (p_0, p_2)
1133 ....
1134 PTR_INCR: p_2 = DATAREF_PTR + step
1135
1136 The pointer def-use update-chain after this function:
1137 DATAREF_PTR = phi (p_0, p_2)
1138 ....
1139 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1140 ....
1141 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1142
1143 Input:
1144 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1145 in the loop.
1146 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1147 the loop. The increment amount across iterations is expected
1148 to be vector_size.
1149 BSI - location where the new update stmt is to be placed.
1150 STMT - the original scalar memory-access stmt that is being vectorized.
1151 BUMP - optional. The offset by which to bump the pointer. If not given,
1152 the offset is assumed to be vector_size.
1153
1154 Output: Return NEW_DATAREF_PTR as illustrated above.
1155
1156 */
1157
1158 static tree
1159 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
1160 tree stmt, tree bump)
1161 {
1162 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1163 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1164 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1165 tree vptr_type = TREE_TYPE (dataref_ptr);
1166 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1167 tree update = TYPE_SIZE_UNIT (vectype);
1168 tree incr_stmt;
1169 ssa_op_iter iter;
1170 use_operand_p use_p;
1171 tree new_dataref_ptr;
1172
1173 if (bump)
1174 update = bump;
1175
1176 incr_stmt = build_gimple_modify_stmt (ptr_var,
1177 build2 (POINTER_PLUS_EXPR, vptr_type,
1178 dataref_ptr, update));
1179 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1180 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1181 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1182
1183 /* Copy the points-to information if it exists. */
1184 if (DR_PTR_INFO (dr))
1185 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1186 merge_alias_info (new_dataref_ptr, dataref_ptr);
1187
1188 if (!ptr_incr)
1189 return new_dataref_ptr;
1190
1191 /* Update the vector-pointer's cross-iteration increment. */
1192 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1193 {
1194 tree use = USE_FROM_PTR (use_p);
1195
1196 if (use == dataref_ptr)
1197 SET_USE (use_p, new_dataref_ptr);
1198 else
1199 gcc_assert (tree_int_cst_compare (use, update) == 0);
1200 }
1201
1202 return new_dataref_ptr;
1203 }
1204
1205
1206 /* Function vect_create_destination_var.
1207
1208 Create a new temporary of type VECTYPE. */
1209
1210 static tree
1211 vect_create_destination_var (tree scalar_dest, tree vectype)
1212 {
1213 tree vec_dest;
1214 const char *new_name;
1215 tree type;
1216 enum vect_var_kind kind;
1217
1218 kind = vectype ? vect_simple_var : vect_scalar_var;
1219 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1220
1221 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1222
1223 new_name = get_name (scalar_dest);
1224 if (!new_name)
1225 new_name = "var_";
1226 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1227 add_referenced_var (vec_dest);
1228
1229 return vec_dest;
1230 }
1231
1232
1233 /* Function vect_init_vector.
1234
1235 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1236 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1237 is not NULL. Otherwise, place the initialization at the loop preheader.
1238 Return the DEF of INIT_STMT.
1239 It will be used in the vectorization of STMT. */
1240
1241 static tree
1242 vect_init_vector (tree stmt, tree vector_var, tree vector_type,
1243 block_stmt_iterator *bsi)
1244 {
1245 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1246 tree new_var;
1247 tree init_stmt;
1248 tree vec_oprnd;
1249 edge pe;
1250 tree new_temp;
1251 basic_block new_bb;
1252
1253 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1254 add_referenced_var (new_var);
1255 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1256 new_temp = make_ssa_name (new_var, init_stmt);
1257 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1258
1259 if (bsi)
1260 vect_finish_stmt_generation (stmt, init_stmt, bsi);
1261 else
1262 {
1263 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1264 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1265
1266 if (nested_in_vect_loop_p (loop, stmt))
1267 loop = loop->inner;
1268 pe = loop_preheader_edge (loop);
1269 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1270 gcc_assert (!new_bb);
1271 }
1272
1273 if (vect_print_dump_info (REPORT_DETAILS))
1274 {
1275 fprintf (vect_dump, "created new init_stmt: ");
1276 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1277 }
1278
1279 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1280 return vec_oprnd;
1281 }
1282
1283
1284 /* For constant and loop invariant defs of SLP_NODE this function returns
1285 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1286 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1287 stmts. */
1288
1289 static void
1290 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1291 unsigned int op_num)
1292 {
1293 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1294 tree stmt = VEC_index (tree, stmts, 0);
1295 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1296 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1297 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1298 tree vec_cst;
1299 tree t = NULL_TREE;
1300 int j, number_of_places_left_in_vector;
1301 tree vector_type;
1302 tree op, vop, operation;
1303 int group_size = VEC_length (tree, stmts);
1304 unsigned int vec_num, i;
1305 int number_of_copies = 1;
1306 bool is_store = false;
1307 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1308 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1309
1310 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1311 is_store = true;
1312
1313 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1314 created vectors. It is greater than 1 if unrolling is performed.
1315
1316 For example, we have two scalar operands, s1 and s2 (e.g., group of
1317 strided accesses of size two), while NUINTS is four (i.e., four scalars
1318 of this type can be packed in a vector). The output vector will contain
1319 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1320 will be 2).
1321
1322 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1323 containing the operands.
1324
1325 For example, NUINTS is four as before, and the group size is 8
1326 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1327 {s5, s6, s7, s8}. */
1328
1329 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1330
1331 number_of_places_left_in_vector = nunits;
1332 for (j = 0; j < number_of_copies; j++)
1333 {
1334 for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
1335 {
1336 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1337 if (is_store)
1338 op = operation;
1339 else
1340 op = TREE_OPERAND (operation, op_num);
1341
1342 /* Create 'vect_ = {op0,op1,...,opn}'. */
1343 t = tree_cons (NULL_TREE, op, t);
1344
1345 number_of_places_left_in_vector--;
1346
1347 if (number_of_places_left_in_vector == 0)
1348 {
1349 number_of_places_left_in_vector = nunits;
1350
1351 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1352 vec_cst = build_constructor_from_list (vector_type, t);
1353 VEC_quick_push (tree, voprnds,
1354 vect_init_vector (stmt, vec_cst, vector_type,
1355 NULL));
1356 t = NULL_TREE;
1357 }
1358 }
1359 }
1360
1361 /* Since the vectors are created in the reverse order, we should invert
1362 them. */
1363 vec_num = VEC_length (tree, voprnds);
1364 for (j = vec_num - 1; j >= 0; j--)
1365 {
1366 vop = VEC_index (tree, voprnds, j);
1367 VEC_quick_push (tree, *vec_oprnds, vop);
1368 }
1369
1370 VEC_free (tree, heap, voprnds);
1371
1372 /* In case that VF is greater than the unrolling factor needed for the SLP
1373 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1374 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1375 to replicate the vectors. */
1376 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1377 {
1378 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1379 VEC_quick_push (tree, *vec_oprnds, vop);
1380 }
1381 }
1382
1383
1384 /* Get vectorized definitions from SLP_NODE that contains corresponding
1385 vectorized def-stmts. */
1386
1387 static void
1388 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1389 {
1390 tree vec_oprnd;
1391 tree vec_def_stmt;
1392 unsigned int i;
1393
1394 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1395
1396 for (i = 0;
1397 VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1398 i++)
1399 {
1400 gcc_assert (vec_def_stmt);
1401 vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0);
1402 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1403 }
1404 }
1405
1406
1407 /* Get vectorized definitions for SLP_NODE.
1408 If the scalar definitions are loop invariants or constants, collect them and
1409 call vect_get_constant_vectors() to create vector stmts.
1410 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1411 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1412 vect_get_slp_vect_defs() to retrieve them.
1413 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1414 the right node. This is used when the second operand must remain scalar. */
1415
1416 static void
1417 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1418 VEC (tree,heap) **vec_oprnds1)
1419 {
1420 tree operation, first_stmt;
1421
1422 /* Allocate memory for vectorized defs. */
1423 *vec_oprnds0 = VEC_alloc (tree, heap,
1424 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1425
1426 /* SLP_NODE corresponds either to a group of stores or to a group of
1427 unary/binary operations. We don't call this function for loads. */
1428 if (SLP_TREE_LEFT (slp_node))
1429 /* The defs are already vectorized. */
1430 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1431 else
1432 /* Build vectors from scalar defs. */
1433 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1434
1435 first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1436 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1437 /* Since we don't call this function with loads, this is a group of
1438 stores. */
1439 return;
1440
1441 operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
1442 if (TREE_OPERAND_LENGTH (operation) == unary_op || !vec_oprnds1)
1443 return;
1444
1445 *vec_oprnds1 = VEC_alloc (tree, heap,
1446 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1447
1448 if (SLP_TREE_RIGHT (slp_node))
1449 /* The defs are already vectorized. */
1450 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1451 else
1452 /* Build vectors from scalar defs. */
1453 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1454 }
1455
1456
1457 /* Function get_initial_def_for_induction
1458
1459 Input:
1460 STMT - a stmt that performs an induction operation in the loop.
1461 IV_PHI - the initial value of the induction variable
1462
1463 Output:
1464 Return a vector variable, initialized with the first VF values of
1465 the induction variable. E.g., for an iv with IV_PHI='X' and
1466 evolution S, for a vector of 4 units, we want to return:
1467 [X, X + S, X + 2*S, X + 3*S]. */
1468
1469 static tree
1470 get_initial_def_for_induction (tree iv_phi)
1471 {
1472 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1473 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1474 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1475 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1476 tree vectype = get_vectype_for_scalar_type (scalar_type);
1477 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1478 edge pe = loop_preheader_edge (loop);
1479 struct loop *iv_loop;
1480 basic_block new_bb;
1481 tree vec, vec_init, vec_step, t;
1482 tree access_fn;
1483 tree new_var;
1484 tree new_name;
1485 tree init_stmt;
1486 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1487 tree init_expr, step_expr;
1488 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1489 int i;
1490 bool ok;
1491 int ncopies = vf / nunits;
1492 tree expr;
1493 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1494 bool nested_in_vect_loop = false;
1495 tree stmts;
1496 imm_use_iterator imm_iter;
1497 use_operand_p use_p;
1498 tree exit_phi;
1499 edge latch_e;
1500 tree loop_arg;
1501 block_stmt_iterator si;
1502 basic_block bb = bb_for_stmt (iv_phi);
1503
1504 gcc_assert (phi_info);
1505 gcc_assert (ncopies >= 1);
1506
1507 /* Find the first insertion point in the BB. */
1508 si = bsi_after_labels (bb);
1509
1510 if (INTEGRAL_TYPE_P (scalar_type))
1511 step_expr = build_int_cst (scalar_type, 0);
1512 else
1513 step_expr = build_real (scalar_type, dconst0);
1514
1515 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1516 if (nested_in_vect_loop_p (loop, iv_phi))
1517 {
1518 nested_in_vect_loop = true;
1519 iv_loop = loop->inner;
1520 }
1521 else
1522 iv_loop = loop;
1523 gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
1524
1525 latch_e = loop_latch_edge (iv_loop);
1526 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1527
1528 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1529 gcc_assert (access_fn);
1530 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1531 &init_expr, &step_expr);
1532 gcc_assert (ok);
1533 pe = loop_preheader_edge (iv_loop);
1534
1535 /* Create the vector that holds the initial_value of the induction. */
1536 if (nested_in_vect_loop)
1537 {
1538 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1539 been created during vectorization of previous stmts; We obtain it from
1540 the STMT_VINFO_VEC_STMT of the defining stmt. */
1541 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1542 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1543 }
1544 else
1545 {
1546 /* iv_loop is the loop to be vectorized. Create:
1547 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1548 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1549 add_referenced_var (new_var);
1550
1551 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1552 if (stmts)
1553 {
1554 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1555 gcc_assert (!new_bb);
1556 }
1557
1558 t = NULL_TREE;
1559 t = tree_cons (NULL_TREE, init_expr, t);
1560 for (i = 1; i < nunits; i++)
1561 {
1562 tree tmp;
1563
1564 /* Create: new_name_i = new_name + step_expr */
1565 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1566 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1567 new_name = make_ssa_name (new_var, init_stmt);
1568 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1569
1570 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1571 gcc_assert (!new_bb);
1572
1573 if (vect_print_dump_info (REPORT_DETAILS))
1574 {
1575 fprintf (vect_dump, "created new init_stmt: ");
1576 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1577 }
1578 t = tree_cons (NULL_TREE, new_name, t);
1579 }
1580 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1581 vec = build_constructor_from_list (vectype, nreverse (t));
1582 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1583 }
1584
1585
1586 /* Create the vector that holds the step of the induction. */
1587 if (nested_in_vect_loop)
1588 /* iv_loop is nested in the loop to be vectorized. Generate:
1589 vec_step = [S, S, S, S] */
1590 new_name = step_expr;
1591 else
1592 {
1593 /* iv_loop is the loop to be vectorized. Generate:
1594 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1595 expr = build_int_cst (scalar_type, vf);
1596 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1597 }
1598
1599 t = NULL_TREE;
1600 for (i = 0; i < nunits; i++)
1601 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1602 vec = build_constructor_from_list (vectype, t);
1603 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1604
1605
1606 /* Create the following def-use cycle:
1607 loop prolog:
1608 vec_init = ...
1609 vec_step = ...
1610 loop:
1611 vec_iv = PHI <vec_init, vec_loop>
1612 ...
1613 STMT
1614 ...
1615 vec_loop = vec_iv + vec_step; */
1616
1617 /* Create the induction-phi that defines the induction-operand. */
1618 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1619 add_referenced_var (vec_dest);
1620 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1621 set_stmt_info (get_stmt_ann (induction_phi),
1622 new_stmt_vec_info (induction_phi, loop_vinfo));
1623 induc_def = PHI_RESULT (induction_phi);
1624
1625 /* Create the iv update inside the loop */
1626 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1627 build2 (PLUS_EXPR, vectype,
1628 induc_def, vec_step));
1629 vec_def = make_ssa_name (vec_dest, new_stmt);
1630 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1631 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1632 set_stmt_info (get_stmt_ann (new_stmt),
1633 new_stmt_vec_info (new_stmt, loop_vinfo));
1634
1635 /* Set the arguments of the phi node: */
1636 add_phi_arg (induction_phi, vec_init, pe);
1637 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1638
1639
1640 /* In case that vectorization factor (VF) is bigger than the number
1641 of elements that we can fit in a vectype (nunits), we have to generate
1642 more than one vector stmt - i.e - we need to "unroll" the
1643 vector stmt by a factor VF/nunits. For more details see documentation
1644 in vectorizable_operation. */
1645
1646 if (ncopies > 1)
1647 {
1648 stmt_vec_info prev_stmt_vinfo;
1649 /* FORNOW. This restriction should be relaxed. */
1650 gcc_assert (!nested_in_vect_loop);
1651
1652 /* Create the vector that holds the step of the induction. */
1653 expr = build_int_cst (scalar_type, nunits);
1654 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1655 t = NULL_TREE;
1656 for (i = 0; i < nunits; i++)
1657 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1658 vec = build_constructor_from_list (vectype, t);
1659 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1660
1661 vec_def = induc_def;
1662 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1663 for (i = 1; i < ncopies; i++)
1664 {
1665 tree tmp;
1666
1667 /* vec_i = vec_prev + vec_step */
1668 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1669 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1670 vec_def = make_ssa_name (vec_dest, new_stmt);
1671 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1672 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1673 set_stmt_info (get_stmt_ann (new_stmt),
1674 new_stmt_vec_info (new_stmt, loop_vinfo));
1675 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1676 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1677 }
1678 }
1679
1680 if (nested_in_vect_loop)
1681 {
1682 /* Find the loop-closed exit-phi of the induction, and record
1683 the final vector of induction results: */
1684 exit_phi = NULL;
1685 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1686 {
1687 if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
1688 {
1689 exit_phi = USE_STMT (use_p);
1690 break;
1691 }
1692 }
1693 if (exit_phi)
1694 {
1695 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1696 /* FORNOW. Currently not supporting the case that an inner-loop induction
1697 is not used in the outer-loop (i.e. only outside the outer-loop). */
1698 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1699 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1700
1701 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1702 if (vect_print_dump_info (REPORT_DETAILS))
1703 {
1704 fprintf (vect_dump, "vector of inductions after inner-loop:");
1705 print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
1706 }
1707 }
1708 }
1709
1710
1711 if (vect_print_dump_info (REPORT_DETAILS))
1712 {
1713 fprintf (vect_dump, "transform induction: created def-use cycle:");
1714 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1715 fprintf (vect_dump, "\n");
1716 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1717 }
1718
1719 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1720 return induc_def;
1721 }
1722
1723
1724 /* Function vect_get_vec_def_for_operand.
1725
1726 OP is an operand in STMT. This function returns a (vector) def that will be
1727 used in the vectorized stmt for STMT.
1728
1729 In the case that OP is an SSA_NAME which is defined in the loop, then
1730 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1731
1732 In case OP is an invariant or constant, a new stmt that creates a vector def
1733 needs to be introduced. */
1734
1735 static tree
1736 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1737 {
1738 tree vec_oprnd;
1739 tree vec_stmt;
1740 tree def_stmt;
1741 stmt_vec_info def_stmt_info = NULL;
1742 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1743 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1744 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1745 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1746 tree vec_inv;
1747 tree vec_cst;
1748 tree t = NULL_TREE;
1749 tree def;
1750 int i;
1751 enum vect_def_type dt;
1752 bool is_simple_use;
1753 tree vector_type;
1754
1755 if (vect_print_dump_info (REPORT_DETAILS))
1756 {
1757 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1758 print_generic_expr (vect_dump, op, TDF_SLIM);
1759 }
1760
1761 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1762 gcc_assert (is_simple_use);
1763 if (vect_print_dump_info (REPORT_DETAILS))
1764 {
1765 if (def)
1766 {
1767 fprintf (vect_dump, "def = ");
1768 print_generic_expr (vect_dump, def, TDF_SLIM);
1769 }
1770 if (def_stmt)
1771 {
1772 fprintf (vect_dump, " def_stmt = ");
1773 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1774 }
1775 }
1776
1777 switch (dt)
1778 {
1779 /* Case 1: operand is a constant. */
1780 case vect_constant_def:
1781 {
1782 if (scalar_def)
1783 *scalar_def = op;
1784
1785 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1786 if (vect_print_dump_info (REPORT_DETAILS))
1787 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1788
1789 for (i = nunits - 1; i >= 0; --i)
1790 {
1791 t = tree_cons (NULL_TREE, op, t);
1792 }
1793 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1794 vec_cst = build_vector (vector_type, t);
1795
1796 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1797 }
1798
1799 /* Case 2: operand is defined outside the loop - loop invariant. */
1800 case vect_invariant_def:
1801 {
1802 if (scalar_def)
1803 *scalar_def = def;
1804
1805 /* Create 'vec_inv = {inv,inv,..,inv}' */
1806 if (vect_print_dump_info (REPORT_DETAILS))
1807 fprintf (vect_dump, "Create vector_inv.");
1808
1809 for (i = nunits - 1; i >= 0; --i)
1810 {
1811 t = tree_cons (NULL_TREE, def, t);
1812 }
1813
1814 /* FIXME: use build_constructor directly. */
1815 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1816 vec_inv = build_constructor_from_list (vector_type, t);
1817 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1818 }
1819
1820 /* Case 3: operand is defined inside the loop. */
1821 case vect_loop_def:
1822 {
1823 if (scalar_def)
1824 *scalar_def = def_stmt;
1825
1826 /* Get the def from the vectorized stmt. */
1827 def_stmt_info = vinfo_for_stmt (def_stmt);
1828 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1829 gcc_assert (vec_stmt);
1830 if (TREE_CODE (vec_stmt) == PHI_NODE)
1831 vec_oprnd = PHI_RESULT (vec_stmt);
1832 else
1833 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1834 return vec_oprnd;
1835 }
1836
1837 /* Case 4: operand is defined by a loop header phi - reduction */
1838 case vect_reduction_def:
1839 {
1840 struct loop *loop;
1841
1842 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1843 loop = (bb_for_stmt (def_stmt))->loop_father;
1844
1845 /* Get the def before the loop */
1846 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1847 return get_initial_def_for_reduction (stmt, op, scalar_def);
1848 }
1849
1850 /* Case 5: operand is defined by loop-header phi - induction. */
1851 case vect_induction_def:
1852 {
1853 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1854
1855 /* Get the def from the vectorized stmt. */
1856 def_stmt_info = vinfo_for_stmt (def_stmt);
1857 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1858 gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
1859 vec_oprnd = PHI_RESULT (vec_stmt);
1860 return vec_oprnd;
1861 }
1862
1863 default:
1864 gcc_unreachable ();
1865 }
1866 }
1867
1868
1869 /* Function vect_get_vec_def_for_stmt_copy
1870
1871 Return a vector-def for an operand. This function is used when the
1872 vectorized stmt to be created (by the caller to this function) is a "copy"
1873 created in case the vectorized result cannot fit in one vector, and several
1874 copies of the vector-stmt are required. In this case the vector-def is
1875 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1876 of the stmt that defines VEC_OPRND.
1877 DT is the type of the vector def VEC_OPRND.
1878
1879 Context:
1880 In case the vectorization factor (VF) is bigger than the number
1881 of elements that can fit in a vectype (nunits), we have to generate
1882 more than one vector stmt to vectorize the scalar stmt. This situation
1883 arises when there are multiple data-types operated upon in the loop; the
1884 smallest data-type determines the VF, and as a result, when vectorizing
1885 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1886 vector stmt (each computing a vector of 'nunits' results, and together
1887 computing 'VF' results in each iteration). This function is called when
1888 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1889 which VF=16 and nunits=4, so the number of copies required is 4):
1890
1891 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1892
1893 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1894 VS1.1: vx.1 = memref1 VS1.2
1895 VS1.2: vx.2 = memref2 VS1.3
1896 VS1.3: vx.3 = memref3
1897
1898 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1899 VSnew.1: vz1 = vx.1 + ... VSnew.2
1900 VSnew.2: vz2 = vx.2 + ... VSnew.3
1901 VSnew.3: vz3 = vx.3 + ...
1902
1903 The vectorization of S1 is explained in vectorizable_load.
1904 The vectorization of S2:
1905 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1906 the function 'vect_get_vec_def_for_operand' is called to
1907 get the relevant vector-def for each operand of S2. For operand x it
1908 returns the vector-def 'vx.0'.
1909
1910 To create the remaining copies of the vector-stmt (VSnew.j), this
1911 function is called to get the relevant vector-def for each operand. It is
1912 obtained from the respective VS1.j stmt, which is recorded in the
1913 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1914
1915 For example, to obtain the vector-def 'vx.1' in order to create the
1916 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1917 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1918 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1919 and return its def ('vx.1').
1920 Overall, to create the above sequence this function will be called 3 times:
1921 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1922 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1923 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1924
1925 static tree
1926 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1927 {
1928 tree vec_stmt_for_operand;
1929 stmt_vec_info def_stmt_info;
1930
1931 /* Do nothing; can reuse same def. */
1932 if (dt == vect_invariant_def || dt == vect_constant_def )
1933 return vec_oprnd;
1934
1935 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1936 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1937 gcc_assert (def_stmt_info);
1938 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1939 gcc_assert (vec_stmt_for_operand);
1940 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1941 return vec_oprnd;
1942 }
1943
1944
1945 /* Get vectorized definitions for the operands to create a copy of an original
1946 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1947
1948 static void
1949 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1950 VEC(tree,heap) **vec_oprnds0,
1951 VEC(tree,heap) **vec_oprnds1)
1952 {
1953 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1954
1955 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1956 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1957
1958 if (vec_oprnds1 && *vec_oprnds1)
1959 {
1960 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1961 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1962 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1963 }
1964 }
1965
1966
1967 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1968
1969 static void
1970 vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0,
1971 VEC(tree,heap) **vec_oprnds1, slp_tree slp_node)
1972 {
1973 if (slp_node)
1974 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1975 else
1976 {
1977 tree vec_oprnd;
1978
1979 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1980 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1981 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1982
1983 if (op1)
1984 {
1985 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1986 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1987 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1988 }
1989 }
1990 }
1991
1992
1993 /* Function vect_finish_stmt_generation.
1994
1995 Insert a new stmt. */
1996
1997 static void
1998 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1999 block_stmt_iterator *bsi)
2000 {
2001 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2002 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2003
2004 gcc_assert (stmt == bsi_stmt (*bsi));
2005 gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
2006
2007 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2008
2009 set_stmt_info (get_stmt_ann (vec_stmt),
2010 new_stmt_vec_info (vec_stmt, loop_vinfo));
2011
2012 if (vect_print_dump_info (REPORT_DETAILS))
2013 {
2014 fprintf (vect_dump, "add new stmt: ");
2015 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2016 }
2017
2018 /* Make sure bsi points to the stmt that is being vectorized. */
2019 gcc_assert (stmt == bsi_stmt (*bsi));
2020
2021 #ifdef USE_MAPPED_LOCATION
2022 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
2023 #else
2024 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2025 #endif
2026 }
2027
2028
2029 /* Function get_initial_def_for_reduction
2030
2031 Input:
2032 STMT - a stmt that performs a reduction operation in the loop.
2033 INIT_VAL - the initial value of the reduction variable
2034
2035 Output:
2036 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2037 of the reduction (used for adjusting the epilog - see below).
2038 Return a vector variable, initialized according to the operation that STMT
2039 performs. This vector will be used as the initial value of the
2040 vector of partial results.
2041
2042 Option1 (adjust in epilog): Initialize the vector as follows:
2043 add: [0,0,...,0,0]
2044 mult: [1,1,...,1,1]
2045 min/max: [init_val,init_val,..,init_val,init_val]
2046 bit and/or: [init_val,init_val,..,init_val,init_val]
2047 and when necessary (e.g. add/mult case) let the caller know
2048 that it needs to adjust the result by init_val.
2049
2050 Option2: Initialize the vector as follows:
2051 add: [0,0,...,0,init_val]
2052 mult: [1,1,...,1,init_val]
2053 min/max: [init_val,init_val,...,init_val]
2054 bit and/or: [init_val,init_val,...,init_val]
2055 and no adjustments are needed.
2056
2057 For example, for the following code:
2058
2059 s = init_val;
2060 for (i=0;i<n;i++)
2061 s = s + a[i];
2062
2063 STMT is 's = s + a[i]', and the reduction variable is 's'.
2064 For a vector of 4 units, we want to return either [0,0,0,init_val],
2065 or [0,0,0,0] and let the caller know that it needs to adjust
2066 the result at the end by 'init_val'.
2067
2068 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2069 initialization vector is simpler (same element in all entries).
2070 A cost model should help decide between these two schemes. */
2071
2072 static tree
2073 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
2074 {
2075 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2076 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2077 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2078 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2079 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2080 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2081 tree type = TREE_TYPE (init_val);
2082 tree vecdef;
2083 tree def_for_init;
2084 tree init_def;
2085 tree t = NULL_TREE;
2086 int i;
2087 tree vector_type;
2088 bool nested_in_vect_loop = false;
2089
2090 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2091 if (nested_in_vect_loop_p (loop, stmt))
2092 nested_in_vect_loop = true;
2093 else
2094 gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
2095
2096 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2097
2098 switch (code)
2099 {
2100 case WIDEN_SUM_EXPR:
2101 case DOT_PROD_EXPR:
2102 case PLUS_EXPR:
2103 if (nested_in_vect_loop)
2104 *adjustment_def = vecdef;
2105 else
2106 *adjustment_def = init_val;
2107 /* Create a vector of zeros for init_def. */
2108 if (INTEGRAL_TYPE_P (type))
2109 def_for_init = build_int_cst (type, 0);
2110 else
2111 def_for_init = build_real (type, dconst0);
2112 for (i = nunits - 1; i >= 0; --i)
2113 t = tree_cons (NULL_TREE, def_for_init, t);
2114 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2115 init_def = build_vector (vector_type, t);
2116 break;
2117
2118 case MIN_EXPR:
2119 case MAX_EXPR:
2120 *adjustment_def = NULL_TREE;
2121 init_def = vecdef;
2122 break;
2123
2124 default:
2125 gcc_unreachable ();
2126 }
2127
2128 return init_def;
2129 }
2130
2131
2132 /* Function vect_create_epilog_for_reduction
2133
2134 Create code at the loop-epilog to finalize the result of a reduction
2135 computation.
2136
2137 VECT_DEF is a vector of partial results.
2138 REDUC_CODE is the tree-code for the epilog reduction.
2139 STMT is the scalar reduction stmt that is being vectorized.
2140 REDUCTION_PHI is the phi-node that carries the reduction computation.
2141
2142 This function:
2143 1. Creates the reduction def-use cycle: sets the arguments for
2144 REDUCTION_PHI:
2145 The loop-entry argument is the vectorized initial-value of the reduction.
2146 The loop-latch argument is VECT_DEF - the vector of partial sums.
2147 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2148 by applying the operation specified by REDUC_CODE if available, or by
2149 other means (whole-vector shifts or a scalar loop).
2150 The function also creates a new phi node at the loop exit to preserve
2151 loop-closed form, as illustrated below.
2152
2153 The flow at the entry to this function:
2154
2155 loop:
2156 vec_def = phi <null, null> # REDUCTION_PHI
2157 VECT_DEF = vector_stmt # vectorized form of STMT
2158 s_loop = scalar_stmt # (scalar) STMT
2159 loop_exit:
2160 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2161 use <s_out0>
2162 use <s_out0>
2163
2164 The above is transformed by this function into:
2165
2166 loop:
2167 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2168 VECT_DEF = vector_stmt # vectorized form of STMT
2169 s_loop = scalar_stmt # (scalar) STMT
2170 loop_exit:
2171 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2172 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2173 v_out2 = reduce <v_out1>
2174 s_out3 = extract_field <v_out2, 0>
2175 s_out4 = adjust_result <s_out3>
2176 use <s_out4>
2177 use <s_out4>
2178 */
2179
2180 static void
2181 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
2182 enum tree_code reduc_code, tree reduction_phi)
2183 {
2184 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2185 tree vectype;
2186 enum machine_mode mode;
2187 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2188 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2189 basic_block exit_bb;
2190 tree scalar_dest;
2191 tree scalar_type;
2192 tree new_phi;
2193 block_stmt_iterator exit_bsi;
2194 tree vec_dest;
2195 tree new_temp = NULL_TREE;
2196 tree new_name;
2197 tree epilog_stmt = NULL_TREE;
2198 tree new_scalar_dest, exit_phi, new_dest;
2199 tree bitsize, bitpos, bytesize;
2200 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2201 tree adjustment_def;
2202 tree vec_initial_def;
2203 tree orig_name;
2204 imm_use_iterator imm_iter;
2205 use_operand_p use_p;
2206 bool extract_scalar_result = false;
2207 tree reduction_op, expr;
2208 tree orig_stmt;
2209 tree use_stmt;
2210 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
2211 bool nested_in_vect_loop = false;
2212 int op_type;
2213 VEC(tree,heap) *phis = NULL;
2214 int i;
2215
2216 if (nested_in_vect_loop_p (loop, stmt))
2217 {
2218 loop = loop->inner;
2219 nested_in_vect_loop = true;
2220 }
2221
2222 op_type = TREE_OPERAND_LENGTH (operation);
2223 reduction_op = TREE_OPERAND (operation, op_type-1);
2224 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2225 mode = TYPE_MODE (vectype);
2226
2227 /*** 1. Create the reduction def-use cycle ***/
2228
2229 /* 1.1 set the loop-entry arg of the reduction-phi: */
2230 /* For the case of reduction, vect_get_vec_def_for_operand returns
2231 the scalar def before the loop, that defines the initial value
2232 of the reduction variable. */
2233 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2234 &adjustment_def);
2235 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
2236
2237 /* 1.2 set the loop-latch arg for the reduction-phi: */
2238 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
2239
2240 if (vect_print_dump_info (REPORT_DETAILS))
2241 {
2242 fprintf (vect_dump, "transform reduction: created def-use cycle:");
2243 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
2244 fprintf (vect_dump, "\n");
2245 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
2246 }
2247
2248
2249 /*** 2. Create epilog code
2250 The reduction epilog code operates across the elements of the vector
2251 of partial results computed by the vectorized loop.
2252 The reduction epilog code consists of:
2253 step 1: compute the scalar result in a vector (v_out2)
2254 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2255 step 3: adjust the scalar result (s_out3) if needed.
2256
2257 Step 1 can be accomplished using one the following three schemes:
2258 (scheme 1) using reduc_code, if available.
2259 (scheme 2) using whole-vector shifts, if available.
2260 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2261 combined.
2262
2263 The overall epilog code looks like this:
2264
2265 s_out0 = phi <s_loop> # original EXIT_PHI
2266 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2267 v_out2 = reduce <v_out1> # step 1
2268 s_out3 = extract_field <v_out2, 0> # step 2
2269 s_out4 = adjust_result <s_out3> # step 3
2270
2271 (step 3 is optional, and step2 1 and 2 may be combined).
2272 Lastly, the uses of s_out0 are replaced by s_out4.
2273
2274 ***/
2275
2276 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2277 v_out1 = phi <v_loop> */
2278
2279 exit_bb = single_exit (loop)->dest;
2280 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2281 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
2282 exit_bsi = bsi_after_labels (exit_bb);
2283
2284 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2285 (i.e. when reduc_code is not available) and in the final adjustment
2286 code (if needed). Also get the original scalar reduction variable as
2287 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2288 represents a reduction pattern), the tree-code and scalar-def are
2289 taken from the original stmt that the pattern-stmt (STMT) replaces.
2290 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2291 are taken from STMT. */
2292
2293 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2294 if (!orig_stmt)
2295 {
2296 /* Regular reduction */
2297 orig_stmt = stmt;
2298 }
2299 else
2300 {
2301 /* Reduction pattern */
2302 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2303 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2305 }
2306 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2307 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
2308 scalar_type = TREE_TYPE (scalar_dest);
2309 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2310 bitsize = TYPE_SIZE (scalar_type);
2311 bytesize = TYPE_SIZE_UNIT (scalar_type);
2312
2313
2314 /* In case this is a reduction in an inner-loop while vectorizing an outer
2315 loop - we don't need to extract a single scalar result at the end of the
2316 inner-loop. The final vector of partial results will be used in the
2317 vectorized outer-loop, or reduced to a scalar result at the end of the
2318 outer-loop. */
2319 if (nested_in_vect_loop)
2320 goto vect_finalize_reduction;
2321
2322 /* 2.3 Create the reduction code, using one of the three schemes described
2323 above. */
2324
2325 if (reduc_code < NUM_TREE_CODES)
2326 {
2327 tree tmp;
2328
2329 /*** Case 1: Create:
2330 v_out2 = reduc_expr <v_out1> */
2331
2332 if (vect_print_dump_info (REPORT_DETAILS))
2333 fprintf (vect_dump, "Reduce using direct vector reduction.");
2334
2335 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2336 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2337 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2338 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2339 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2340 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2341
2342 extract_scalar_result = true;
2343 }
2344 else
2345 {
2346 enum tree_code shift_code = 0;
2347 bool have_whole_vector_shift = true;
2348 int bit_offset;
2349 int element_bitsize = tree_low_cst (bitsize, 1);
2350 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2351 tree vec_temp;
2352
2353 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2354 shift_code = VEC_RSHIFT_EXPR;
2355 else
2356 have_whole_vector_shift = false;
2357
2358 /* Regardless of whether we have a whole vector shift, if we're
2359 emulating the operation via tree-vect-generic, we don't want
2360 to use it. Only the first round of the reduction is likely
2361 to still be profitable via emulation. */
2362 /* ??? It might be better to emit a reduction tree code here, so that
2363 tree-vect-generic can expand the first round via bit tricks. */
2364 if (!VECTOR_MODE_P (mode))
2365 have_whole_vector_shift = false;
2366 else
2367 {
2368 optab optab = optab_for_tree_code (code, vectype);
2369 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2370 have_whole_vector_shift = false;
2371 }
2372
2373 if (have_whole_vector_shift)
2374 {
2375 /*** Case 2: Create:
2376 for (offset = VS/2; offset >= element_size; offset/=2)
2377 {
2378 Create: va' = vec_shift <va, offset>
2379 Create: va = vop <va, va'>
2380 } */
2381
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "Reduce using vector shifts");
2384
2385 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2386 new_temp = PHI_RESULT (new_phi);
2387
2388 for (bit_offset = vec_size_in_bits/2;
2389 bit_offset >= element_bitsize;
2390 bit_offset /= 2)
2391 {
2392 tree bitpos = size_int (bit_offset);
2393 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
2394 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2395 new_name = make_ssa_name (vec_dest, epilog_stmt);
2396 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2397 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2398
2399 tmp = build2 (code, vectype, new_name, new_temp);
2400 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2401 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2402 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2403 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2404 }
2405
2406 extract_scalar_result = true;
2407 }
2408 else
2409 {
2410 tree rhs;
2411
2412 /*** Case 3: Create:
2413 s = extract_field <v_out2, 0>
2414 for (offset = element_size;
2415 offset < vector_size;
2416 offset += element_size;)
2417 {
2418 Create: s' = extract_field <v_out2, offset>
2419 Create: s = op <s, s'>
2420 } */
2421
2422 if (vect_print_dump_info (REPORT_DETAILS))
2423 fprintf (vect_dump, "Reduce using scalar code. ");
2424
2425 vec_temp = PHI_RESULT (new_phi);
2426 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2427 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2428 bitsize_zero_node);
2429 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2430 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2431 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2432 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2433 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2434
2435 for (bit_offset = element_bitsize;
2436 bit_offset < vec_size_in_bits;
2437 bit_offset += element_bitsize)
2438 {
2439 tree tmp;
2440 tree bitpos = bitsize_int (bit_offset);
2441 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2442 bitpos);
2443
2444 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2445 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2446 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2447 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2448 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2449
2450 tmp = build2 (code, scalar_type, new_name, new_temp);
2451 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
2452 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2453 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2454 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2455 }
2456
2457 extract_scalar_result = false;
2458 }
2459 }
2460
2461 /* 2.4 Extract the final scalar result. Create:
2462 s_out3 = extract_field <v_out2, bitpos> */
2463
2464 if (extract_scalar_result)
2465 {
2466 tree rhs;
2467
2468 gcc_assert (!nested_in_vect_loop);
2469 if (vect_print_dump_info (REPORT_DETAILS))
2470 fprintf (vect_dump, "extract scalar result");
2471
2472 if (BYTES_BIG_ENDIAN)
2473 bitpos = size_binop (MULT_EXPR,
2474 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2475 TYPE_SIZE (scalar_type));
2476 else
2477 bitpos = bitsize_zero_node;
2478
2479 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2480 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2481 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2482 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2483 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2484 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2485 }
2486
2487 vect_finalize_reduction:
2488
2489 /* 2.5 Adjust the final result by the initial value of the reduction
2490 variable. (When such adjustment is not needed, then
2491 'adjustment_def' is zero). For example, if code is PLUS we create:
2492 new_temp = loop_exit_def + adjustment_def */
2493
2494 if (adjustment_def)
2495 {
2496 if (nested_in_vect_loop)
2497 {
2498 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2499 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2500 new_dest = vect_create_destination_var (scalar_dest, vectype);
2501 }
2502 else
2503 {
2504 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2505 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2506 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2507 }
2508 epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
2509 new_temp = make_ssa_name (new_dest, epilog_stmt);
2510 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2511 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2512 }
2513
2514
2515 /* 2.6 Handle the loop-exit phi */
2516
2517 /* Replace uses of s_out0 with uses of s_out3:
2518 Find the loop-closed-use at the loop exit of the original scalar result.
2519 (The reduction result is expected to have two immediate uses - one at the
2520 latch block, and one at the loop exit). */
2521 phis = VEC_alloc (tree, heap, 10);
2522 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2523 {
2524 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
2525 {
2526 exit_phi = USE_STMT (use_p);
2527 VEC_quick_push (tree, phis, exit_phi);
2528 }
2529 }
2530 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2531 gcc_assert (!VEC_empty (tree, phis));
2532
2533 for (i = 0; VEC_iterate (tree, phis, i, exit_phi); i++)
2534 {
2535 if (nested_in_vect_loop)
2536 {
2537 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2538
2539 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2540 is not used in the outer-loop (but only outside the outer-loop). */
2541 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2542 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2543
2544 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2545 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2546 set_stmt_info (get_stmt_ann (epilog_stmt),
2547 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2548 continue;
2549 }
2550
2551 /* Replace the uses: */
2552 orig_name = PHI_RESULT (exit_phi);
2553 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2554 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2555 SET_USE (use_p, new_temp);
2556 }
2557 VEC_free (tree, heap, phis);
2558 }
2559
2560
2561 /* Function vectorizable_reduction.
2562
2563 Check if STMT performs a reduction operation that can be vectorized.
2564 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2565 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2566 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2567
2568 This function also handles reduction idioms (patterns) that have been
2569 recognized in advance during vect_pattern_recog. In this case, STMT may be
2570 of this form:
2571 X = pattern_expr (arg0, arg1, ..., X)
2572 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2573 sequence that had been detected and replaced by the pattern-stmt (STMT).
2574
2575 In some cases of reduction patterns, the type of the reduction variable X is
2576 different than the type of the other arguments of STMT.
2577 In such cases, the vectype that is used when transforming STMT into a vector
2578 stmt is different than the vectype that is used to determine the
2579 vectorization factor, because it consists of a different number of elements
2580 than the actual number of elements that are being operated upon in parallel.
2581
2582 For example, consider an accumulation of shorts into an int accumulator.
2583 On some targets it's possible to vectorize this pattern operating on 8
2584 shorts at a time (hence, the vectype for purposes of determining the
2585 vectorization factor should be V8HI); on the other hand, the vectype that
2586 is used to create the vector form is actually V4SI (the type of the result).
2587
2588 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2589 indicates what is the actual level of parallelism (V8HI in the example), so
2590 that the right vectorization factor would be derived. This vectype
2591 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2592 be used to create the vectorized stmt. The right vectype for the vectorized
2593 stmt is obtained from the type of the result X:
2594 get_vectype_for_scalar_type (TREE_TYPE (X))
2595
2596 This means that, contrary to "regular" reductions (or "regular" stmts in
2597 general), the following equation:
2598 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2599 does *NOT* necessarily hold for reduction patterns. */
2600
2601 bool
2602 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2603 {
2604 tree vec_dest;
2605 tree scalar_dest;
2606 tree op;
2607 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2608 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2609 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2610 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2611 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2612 tree operation;
2613 enum tree_code code, orig_code, epilog_reduc_code = 0;
2614 enum machine_mode vec_mode;
2615 int op_type;
2616 optab optab, reduc_optab;
2617 tree new_temp = NULL_TREE;
2618 tree def, def_stmt;
2619 enum vect_def_type dt;
2620 tree new_phi;
2621 tree scalar_type;
2622 bool is_simple_use;
2623 tree orig_stmt;
2624 stmt_vec_info orig_stmt_info;
2625 tree expr = NULL_TREE;
2626 int i;
2627 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2628 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2629 stmt_vec_info prev_stmt_info;
2630 tree reduc_def;
2631 tree new_stmt = NULL_TREE;
2632 int j;
2633
2634 if (nested_in_vect_loop_p (loop, stmt))
2635 {
2636 loop = loop->inner;
2637 /* FORNOW. This restriction should be relaxed. */
2638 if (ncopies > 1)
2639 {
2640 if (vect_print_dump_info (REPORT_DETAILS))
2641 fprintf (vect_dump, "multiple types in nested loop.");
2642 return false;
2643 }
2644 }
2645
2646 gcc_assert (ncopies >= 1);
2647
2648 /* FORNOW: SLP not supported. */
2649 if (STMT_SLP_TYPE (stmt_info))
2650 return false;
2651
2652 /* 1. Is vectorizable reduction? */
2653
2654 /* Not supportable if the reduction variable is used in the loop. */
2655 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2656 return false;
2657
2658 /* Reductions that are not used even in an enclosing outer-loop,
2659 are expected to be "live" (used out of the loop). */
2660 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2661 && !STMT_VINFO_LIVE_P (stmt_info))
2662 return false;
2663
2664 /* Make sure it was already recognized as a reduction computation. */
2665 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2666 return false;
2667
2668 /* 2. Has this been recognized as a reduction pattern?
2669
2670 Check if STMT represents a pattern that has been recognized
2671 in earlier analysis stages. For stmts that represent a pattern,
2672 the STMT_VINFO_RELATED_STMT field records the last stmt in
2673 the original sequence that constitutes the pattern. */
2674
2675 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2676 if (orig_stmt)
2677 {
2678 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2679 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2680 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2681 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2682 }
2683
2684 /* 3. Check the operands of the operation. The first operands are defined
2685 inside the loop body. The last operand is the reduction variable,
2686 which is defined by the loop-header-phi. */
2687
2688 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2689
2690 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2691 code = TREE_CODE (operation);
2692 op_type = TREE_OPERAND_LENGTH (operation);
2693 if (op_type != binary_op && op_type != ternary_op)
2694 return false;
2695 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2696 scalar_type = TREE_TYPE (scalar_dest);
2697
2698 /* All uses but the last are expected to be defined in the loop.
2699 The last use is the reduction variable. */
2700 for (i = 0; i < op_type-1; i++)
2701 {
2702 op = TREE_OPERAND (operation, i);
2703 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2704 gcc_assert (is_simple_use);
2705 if (dt != vect_loop_def
2706 && dt != vect_invariant_def
2707 && dt != vect_constant_def
2708 && dt != vect_induction_def)
2709 return false;
2710 }
2711
2712 op = TREE_OPERAND (operation, i);
2713 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2714 gcc_assert (is_simple_use);
2715 gcc_assert (dt == vect_reduction_def);
2716 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2717 if (orig_stmt)
2718 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2719 else
2720 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2721
2722 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2723 return false;
2724
2725 /* 4. Supportable by target? */
2726
2727 /* 4.1. check support for the operation in the loop */
2728 optab = optab_for_tree_code (code, vectype);
2729 if (!optab)
2730 {
2731 if (vect_print_dump_info (REPORT_DETAILS))
2732 fprintf (vect_dump, "no optab.");
2733 return false;
2734 }
2735 vec_mode = TYPE_MODE (vectype);
2736 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2737 {
2738 if (vect_print_dump_info (REPORT_DETAILS))
2739 fprintf (vect_dump, "op not supported by target.");
2740 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2741 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2742 < vect_min_worthwhile_factor (code))
2743 return false;
2744 if (vect_print_dump_info (REPORT_DETAILS))
2745 fprintf (vect_dump, "proceeding using word mode.");
2746 }
2747
2748 /* Worthwhile without SIMD support? */
2749 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2750 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2751 < vect_min_worthwhile_factor (code))
2752 {
2753 if (vect_print_dump_info (REPORT_DETAILS))
2754 fprintf (vect_dump, "not worthwhile without SIMD support.");
2755 return false;
2756 }
2757
2758 /* 4.2. Check support for the epilog operation.
2759
2760 If STMT represents a reduction pattern, then the type of the
2761 reduction variable may be different than the type of the rest
2762 of the arguments. For example, consider the case of accumulation
2763 of shorts into an int accumulator; The original code:
2764 S1: int_a = (int) short_a;
2765 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2766
2767 was replaced with:
2768 STMT: int_acc = widen_sum <short_a, int_acc>
2769
2770 This means that:
2771 1. The tree-code that is used to create the vector operation in the
2772 epilog code (that reduces the partial results) is not the
2773 tree-code of STMT, but is rather the tree-code of the original
2774 stmt from the pattern that STMT is replacing. I.e, in the example
2775 above we want to use 'widen_sum' in the loop, but 'plus' in the
2776 epilog.
2777 2. The type (mode) we use to check available target support
2778 for the vector operation to be created in the *epilog*, is
2779 determined by the type of the reduction variable (in the example
2780 above we'd check this: plus_optab[vect_int_mode]).
2781 However the type (mode) we use to check available target support
2782 for the vector operation to be created *inside the loop*, is
2783 determined by the type of the other arguments to STMT (in the
2784 example we'd check this: widen_sum_optab[vect_short_mode]).
2785
2786 This is contrary to "regular" reductions, in which the types of all
2787 the arguments are the same as the type of the reduction variable.
2788 For "regular" reductions we can therefore use the same vector type
2789 (and also the same tree-code) when generating the epilog code and
2790 when generating the code inside the loop. */
2791
2792 if (orig_stmt)
2793 {
2794 /* This is a reduction pattern: get the vectype from the type of the
2795 reduction variable, and get the tree-code from orig_stmt. */
2796 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2797 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2798 vec_mode = TYPE_MODE (vectype);
2799 }
2800 else
2801 {
2802 /* Regular reduction: use the same vectype and tree-code as used for
2803 the vector code inside the loop can be used for the epilog code. */
2804 orig_code = code;
2805 }
2806
2807 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2808 return false;
2809 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2810 if (!reduc_optab)
2811 {
2812 if (vect_print_dump_info (REPORT_DETAILS))
2813 fprintf (vect_dump, "no optab for reduction.");
2814 epilog_reduc_code = NUM_TREE_CODES;
2815 }
2816 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
2817 {
2818 if (vect_print_dump_info (REPORT_DETAILS))
2819 fprintf (vect_dump, "reduc op not supported by target.");
2820 epilog_reduc_code = NUM_TREE_CODES;
2821 }
2822
2823 if (!vec_stmt) /* transformation not required. */
2824 {
2825 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2826 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2827 return true;
2828 }
2829
2830 /** Transform. **/
2831
2832 if (vect_print_dump_info (REPORT_DETAILS))
2833 fprintf (vect_dump, "transform reduction.");
2834
2835 /* Create the destination vector */
2836 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2837
2838 /* Create the reduction-phi that defines the reduction-operand. */
2839 new_phi = create_phi_node (vec_dest, loop->header);
2840
2841 /* In case the vectorization factor (VF) is bigger than the number
2842 of elements that we can fit in a vectype (nunits), we have to generate
2843 more than one vector stmt - i.e - we need to "unroll" the
2844 vector stmt by a factor VF/nunits. For more details see documentation
2845 in vectorizable_operation. */
2846
2847 prev_stmt_info = NULL;
2848 for (j = 0; j < ncopies; j++)
2849 {
2850 /* Handle uses. */
2851 if (j == 0)
2852 {
2853 op = TREE_OPERAND (operation, 0);
2854 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2855 if (op_type == ternary_op)
2856 {
2857 op = TREE_OPERAND (operation, 1);
2858 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2859 }
2860
2861 /* Get the vector def for the reduction variable from the phi node */
2862 reduc_def = PHI_RESULT (new_phi);
2863 }
2864 else
2865 {
2866 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2867 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2868 if (op_type == ternary_op)
2869 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2870
2871 /* Get the vector def for the reduction variable from the vectorized
2872 reduction operation generated in the previous iteration (j-1) */
2873 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2874 }
2875
2876 /* Arguments are ready. create the new vector stmt. */
2877 if (op_type == binary_op)
2878 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2879 else
2880 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2881 reduc_def);
2882 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2883 new_temp = make_ssa_name (vec_dest, new_stmt);
2884 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2885 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2886
2887 if (j == 0)
2888 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2889 else
2890 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2891 prev_stmt_info = vinfo_for_stmt (new_stmt);
2892 }
2893
2894 /* Finalize the reduction-phi (set it's arguments) and create the
2895 epilog reduction code. */
2896 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2897 return true;
2898 }
2899
2900 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2901 a function declaration if the target has a vectorized version
2902 of the function, or NULL_TREE if the function cannot be vectorized. */
2903
2904 tree
2905 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2906 {
2907 tree fndecl = get_callee_fndecl (call);
2908 enum built_in_function code;
2909
2910 /* We only handle functions that do not read or clobber memory -- i.e.
2911 const or novops ones. */
2912 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2913 return NULL_TREE;
2914
2915 if (!fndecl
2916 || TREE_CODE (fndecl) != FUNCTION_DECL
2917 || !DECL_BUILT_IN (fndecl))
2918 return NULL_TREE;
2919
2920 code = DECL_FUNCTION_CODE (fndecl);
2921 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2922 vectype_in);
2923 }
2924
2925 /* Function vectorizable_call.
2926
2927 Check if STMT performs a function call that can be vectorized.
2928 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2929 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2930 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2931
2932 bool
2933 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2934 {
2935 tree vec_dest;
2936 tree scalar_dest;
2937 tree operation;
2938 tree op, type;
2939 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2940 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2941 tree vectype_out, vectype_in;
2942 int nunits_in;
2943 int nunits_out;
2944 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2945 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2946 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2947 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2948 tree new_stmt;
2949 int ncopies, j, nargs;
2950 call_expr_arg_iterator iter;
2951 tree vargs;
2952 enum { NARROW, NONE, WIDEN } modifier;
2953
2954 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2955 return false;
2956
2957 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2958 return false;
2959
2960 /* FORNOW: SLP not supported. */
2961 if (STMT_SLP_TYPE (stmt_info))
2962 return false;
2963
2964 /* FORNOW: not yet supported. */
2965 if (STMT_VINFO_LIVE_P (stmt_info))
2966 {
2967 if (vect_print_dump_info (REPORT_DETAILS))
2968 fprintf (vect_dump, "value used after loop.");
2969 return false;
2970 }
2971
2972 /* Is STMT a vectorizable call? */
2973 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2974 return false;
2975
2976 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2977 return false;
2978
2979 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2980 if (TREE_CODE (operation) != CALL_EXPR)
2981 return false;
2982
2983 /* Process function arguments. */
2984 rhs_type = NULL_TREE;
2985 nargs = 0;
2986 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2987 {
2988 /* Bail out if the function has more than two arguments, we
2989 do not have interesting builtin functions to vectorize with
2990 more than two arguments. */
2991 if (nargs >= 2)
2992 return false;
2993
2994 /* We can only handle calls with arguments of the same type. */
2995 if (rhs_type
2996 && rhs_type != TREE_TYPE (op))
2997 {
2998 if (vect_print_dump_info (REPORT_DETAILS))
2999 fprintf (vect_dump, "argument types differ.");
3000 return false;
3001 }
3002 rhs_type = TREE_TYPE (op);
3003
3004 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
3005 {
3006 if (vect_print_dump_info (REPORT_DETAILS))
3007 fprintf (vect_dump, "use not simple.");
3008 return false;
3009 }
3010
3011 ++nargs;
3012 }
3013
3014 /* No arguments is also not good. */
3015 if (nargs == 0)
3016 return false;
3017
3018 vectype_in = get_vectype_for_scalar_type (rhs_type);
3019 if (!vectype_in)
3020 return false;
3021 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3022
3023 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
3024 vectype_out = get_vectype_for_scalar_type (lhs_type);
3025 if (!vectype_out)
3026 return false;
3027 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3028
3029 /* FORNOW */
3030 if (nunits_in == nunits_out / 2)
3031 modifier = NARROW;
3032 else if (nunits_out == nunits_in)
3033 modifier = NONE;
3034 else if (nunits_out == nunits_in / 2)
3035 modifier = WIDEN;
3036 else
3037 return false;
3038
3039 /* For now, we only vectorize functions if a target specific builtin
3040 is available. TODO -- in some cases, it might be profitable to
3041 insert the calls for pieces of the vector, in order to be able
3042 to vectorize other operations in the loop. */
3043 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
3044 if (fndecl == NULL_TREE)
3045 {
3046 if (vect_print_dump_info (REPORT_DETAILS))
3047 fprintf (vect_dump, "function is not vectorizable.");
3048
3049 return false;
3050 }
3051
3052 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3053
3054 if (modifier == NARROW)
3055 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3056 else
3057 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3058
3059 /* Sanity check: make sure that at least one copy of the vectorized stmt
3060 needs to be generated. */
3061 gcc_assert (ncopies >= 1);
3062
3063 /* FORNOW. This restriction should be relaxed. */
3064 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3065 {
3066 if (vect_print_dump_info (REPORT_DETAILS))
3067 fprintf (vect_dump, "multiple types in nested loop.");
3068 return false;
3069 }
3070
3071 if (!vec_stmt) /* transformation not required. */
3072 {
3073 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3074 if (vect_print_dump_info (REPORT_DETAILS))
3075 fprintf (vect_dump, "=== vectorizable_call ===");
3076 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3077 return true;
3078 }
3079
3080 /** Transform. **/
3081
3082 if (vect_print_dump_info (REPORT_DETAILS))
3083 fprintf (vect_dump, "transform operation.");
3084
3085 /* FORNOW. This restriction should be relaxed. */
3086 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3087 {
3088 if (vect_print_dump_info (REPORT_DETAILS))
3089 fprintf (vect_dump, "multiple types in nested loop.");
3090 return false;
3091 }
3092
3093 /* Handle def. */
3094 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3095 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3096
3097 prev_stmt_info = NULL;
3098 switch (modifier)
3099 {
3100 case NONE:
3101 for (j = 0; j < ncopies; ++j)
3102 {
3103 /* Build argument list for the vectorized call. */
3104 /* FIXME: Rewrite this so that it doesn't
3105 construct a temporary list. */
3106 vargs = NULL_TREE;
3107 nargs = 0;
3108 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3109 {
3110 if (j == 0)
3111 vec_oprnd0
3112 = vect_get_vec_def_for_operand (op, stmt, NULL);
3113 else
3114 vec_oprnd0
3115 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3116
3117 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3118
3119 ++nargs;
3120 }
3121 vargs = nreverse (vargs);
3122
3123 rhs = build_function_call_expr (fndecl, vargs);
3124 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3125 new_temp = make_ssa_name (vec_dest, new_stmt);
3126 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3127
3128 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3129
3130 if (j == 0)
3131 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3132 else
3133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3134
3135 prev_stmt_info = vinfo_for_stmt (new_stmt);
3136 }
3137
3138 break;
3139
3140 case NARROW:
3141 for (j = 0; j < ncopies; ++j)
3142 {
3143 /* Build argument list for the vectorized call. */
3144 /* FIXME: Rewrite this so that it doesn't
3145 construct a temporary list. */
3146 vargs = NULL_TREE;
3147 nargs = 0;
3148 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3149 {
3150 if (j == 0)
3151 {
3152 vec_oprnd0
3153 = vect_get_vec_def_for_operand (op, stmt, NULL);
3154 vec_oprnd1
3155 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3156 }
3157 else
3158 {
3159 vec_oprnd0
3160 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3161 vec_oprnd1
3162 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3163 }
3164
3165 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3166 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
3167
3168 ++nargs;
3169 }
3170 vargs = nreverse (vargs);
3171
3172 rhs = build_function_call_expr (fndecl, vargs);
3173 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3174 new_temp = make_ssa_name (vec_dest, new_stmt);
3175 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3176
3177 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3178
3179 if (j == 0)
3180 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3181 else
3182 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3183
3184 prev_stmt_info = vinfo_for_stmt (new_stmt);
3185 }
3186
3187 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3188
3189 break;
3190
3191 case WIDEN:
3192 /* No current target implements this case. */
3193 return false;
3194 }
3195
3196 /* The call in STMT might prevent it from being removed in dce.
3197 We however cannot remove it here, due to the way the ssa name
3198 it defines is mapped to the new definition. So just replace
3199 rhs of the statement with something harmless. */
3200 type = TREE_TYPE (scalar_dest);
3201 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
3202 update_stmt (stmt);
3203
3204 return true;
3205 }
3206
3207
3208 /* Function vect_gen_widened_results_half
3209
3210 Create a vector stmt whose code, type, number of arguments, and result
3211 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3212 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3213 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3214 needs to be created (DECL is a function-decl of a target-builtin).
3215 STMT is the original scalar stmt that we are vectorizing. */
3216
3217 static tree
3218 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
3219 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3220 tree vec_dest, block_stmt_iterator *bsi,
3221 tree stmt)
3222 {
3223 tree expr;
3224 tree new_stmt;
3225 tree new_temp;
3226 tree sym;
3227 ssa_op_iter iter;
3228
3229 /* Generate half of the widened result: */
3230 if (code == CALL_EXPR)
3231 {
3232 /* Target specific support */
3233 if (op_type == binary_op)
3234 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
3235 else
3236 expr = build_call_expr (decl, 1, vec_oprnd0);
3237 }
3238 else
3239 {
3240 /* Generic support */
3241 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3242 if (op_type == binary_op)
3243 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
3244 else
3245 expr = build1 (code, vectype, vec_oprnd0);
3246 }
3247 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3248 new_temp = make_ssa_name (vec_dest, new_stmt);
3249 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3250 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3251
3252 if (code == CALL_EXPR)
3253 {
3254 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3255 {
3256 if (TREE_CODE (sym) == SSA_NAME)
3257 sym = SSA_NAME_VAR (sym);
3258 mark_sym_for_renaming (sym);
3259 }
3260 }
3261
3262 return new_stmt;
3263 }
3264
3265
3266 /* Check if STMT performs a conversion operation, that can be vectorized.
3267 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3268 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3269 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3270
3271 bool
3272 vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
3273 tree *vec_stmt, slp_tree slp_node)
3274 {
3275 tree vec_dest;
3276 tree scalar_dest;
3277 tree operation;
3278 tree op0;
3279 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3280 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3281 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3282 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3283 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3284 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3285 tree new_temp;
3286 tree def, def_stmt;
3287 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3288 tree new_stmt = NULL_TREE;
3289 stmt_vec_info prev_stmt_info;
3290 int nunits_in;
3291 int nunits_out;
3292 tree vectype_out, vectype_in;
3293 int ncopies, j;
3294 tree expr;
3295 tree rhs_type, lhs_type;
3296 tree builtin_decl;
3297 enum { NARROW, NONE, WIDEN } modifier;
3298 int i;
3299 VEC(tree,heap) *vec_oprnds0 = NULL;
3300 tree vop0;
3301
3302 /* Is STMT a vectorizable conversion? */
3303
3304 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3305 return false;
3306
3307 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3308 return false;
3309
3310 if (STMT_VINFO_LIVE_P (stmt_info))
3311 {
3312 /* FORNOW: not yet supported. */
3313 if (vect_print_dump_info (REPORT_DETAILS))
3314 fprintf (vect_dump, "value used after loop.");
3315 return false;
3316 }
3317
3318 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3319 return false;
3320
3321 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3322 return false;
3323
3324 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3325 code = TREE_CODE (operation);
3326 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3327 return false;
3328
3329 /* Check types of lhs and rhs. */
3330 op0 = TREE_OPERAND (operation, 0);
3331 rhs_type = TREE_TYPE (op0);
3332 vectype_in = get_vectype_for_scalar_type (rhs_type);
3333 if (!vectype_in)
3334 return false;
3335 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3336
3337 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3338 lhs_type = TREE_TYPE (scalar_dest);
3339 vectype_out = get_vectype_for_scalar_type (lhs_type);
3340 if (!vectype_out)
3341 return false;
3342 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3343
3344 /* FORNOW */
3345 if (nunits_in == nunits_out / 2)
3346 modifier = NARROW;
3347 else if (nunits_out == nunits_in)
3348 modifier = NONE;
3349 else if (nunits_out == nunits_in / 2)
3350 modifier = WIDEN;
3351 else
3352 return false;
3353
3354 if (modifier == NONE)
3355 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3356
3357 /* Bail out if the types are both integral or non-integral. */
3358 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3359 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3360 return false;
3361
3362 if (modifier == NARROW)
3363 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3364 else
3365 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3366
3367 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3368 this, so we can safely override NCOPIES with 1 here. */
3369 if (slp_node)
3370 ncopies = 1;
3371
3372 /* Sanity check: make sure that at least one copy of the vectorized stmt
3373 needs to be generated. */
3374 gcc_assert (ncopies >= 1);
3375
3376 /* FORNOW. This restriction should be relaxed. */
3377 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3378 {
3379 if (vect_print_dump_info (REPORT_DETAILS))
3380 fprintf (vect_dump, "multiple types in nested loop.");
3381 return false;
3382 }
3383
3384 /* Check the operands of the operation. */
3385 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3386 {
3387 if (vect_print_dump_info (REPORT_DETAILS))
3388 fprintf (vect_dump, "use not simple.");
3389 return false;
3390 }
3391
3392 /* Supportable by target? */
3393 if ((modifier == NONE
3394 && !targetm.vectorize.builtin_conversion (code, vectype_in))
3395 || (modifier == WIDEN
3396 && !supportable_widening_operation (code, stmt, vectype_in,
3397 &decl1, &decl2,
3398 &code1, &code2))
3399 || (modifier == NARROW
3400 && !supportable_narrowing_operation (code, stmt, vectype_in,
3401 &code1)))
3402 {
3403 if (vect_print_dump_info (REPORT_DETAILS))
3404 fprintf (vect_dump, "op not supported by target.");
3405 return false;
3406 }
3407
3408 if (modifier != NONE)
3409 {
3410 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3411 /* FORNOW: SLP not supported. */
3412 if (STMT_SLP_TYPE (stmt_info))
3413 return false;
3414 }
3415
3416 if (!vec_stmt) /* transformation not required. */
3417 {
3418 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3419 return true;
3420 }
3421
3422 /** Transform. **/
3423 if (vect_print_dump_info (REPORT_DETAILS))
3424 fprintf (vect_dump, "transform conversion.");
3425
3426 /* Handle def. */
3427 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3428
3429 if (modifier == NONE && !slp_node)
3430 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3431
3432 prev_stmt_info = NULL;
3433 switch (modifier)
3434 {
3435 case NONE:
3436 for (j = 0; j < ncopies; j++)
3437 {
3438 tree sym;
3439 ssa_op_iter iter;
3440
3441 if (j == 0)
3442 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3443 else
3444 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3445
3446 builtin_decl =
3447 targetm.vectorize.builtin_conversion (code, vectype_in);
3448 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3449 {
3450 new_stmt = build_call_expr (builtin_decl, 1, vop0);
3451
3452 /* Arguments are ready. create the new vector stmt. */
3453 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3454 new_temp = make_ssa_name (vec_dest, new_stmt);
3455 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3456 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3457 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3458 SSA_OP_ALL_VIRTUALS)
3459 {
3460 if (TREE_CODE (sym) == SSA_NAME)
3461 sym = SSA_NAME_VAR (sym);
3462 mark_sym_for_renaming (sym);
3463 }
3464 if (slp_node)
3465 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3466 }
3467
3468 if (j == 0)
3469 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3470 else
3471 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3472 prev_stmt_info = vinfo_for_stmt (new_stmt);
3473 }
3474 break;
3475
3476 case WIDEN:
3477 /* In case the vectorization factor (VF) is bigger than the number
3478 of elements that we can fit in a vectype (nunits), we have to
3479 generate more than one vector stmt - i.e - we need to "unroll"
3480 the vector stmt by a factor VF/nunits. */
3481 for (j = 0; j < ncopies; j++)
3482 {
3483 if (j == 0)
3484 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3485 else
3486 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3487
3488 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3489
3490 /* Generate first half of the widened result: */
3491 new_stmt
3492 = vect_gen_widened_results_half (code1, vectype_out, decl1,
3493 vec_oprnd0, vec_oprnd1,
3494 unary_op, vec_dest, bsi, stmt);
3495 if (j == 0)
3496 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3497 else
3498 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3499 prev_stmt_info = vinfo_for_stmt (new_stmt);
3500
3501 /* Generate second half of the widened result: */
3502 new_stmt
3503 = vect_gen_widened_results_half (code2, vectype_out, decl2,
3504 vec_oprnd0, vec_oprnd1,
3505 unary_op, vec_dest, bsi, stmt);
3506 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3507 prev_stmt_info = vinfo_for_stmt (new_stmt);
3508 }
3509 break;
3510
3511 case NARROW:
3512 /* In case the vectorization factor (VF) is bigger than the number
3513 of elements that we can fit in a vectype (nunits), we have to
3514 generate more than one vector stmt - i.e - we need to "unroll"
3515 the vector stmt by a factor VF/nunits. */
3516 for (j = 0; j < ncopies; j++)
3517 {
3518 /* Handle uses. */
3519 if (j == 0)
3520 {
3521 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3522 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3523 }
3524 else
3525 {
3526 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3527 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3528 }
3529
3530 /* Arguments are ready. Create the new vector stmt. */
3531 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3532 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3533 new_temp = make_ssa_name (vec_dest, new_stmt);
3534 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3535 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3536
3537 if (j == 0)
3538 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3539 else
3540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3541
3542 prev_stmt_info = vinfo_for_stmt (new_stmt);
3543 }
3544
3545 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3546 }
3547
3548 return true;
3549 }
3550
3551
3552 /* Function vectorizable_assignment.
3553
3554 Check if STMT performs an assignment (copy) that can be vectorized.
3555 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3556 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3558
3559 bool
3560 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3561 slp_tree slp_node)
3562 {
3563 tree vec_dest;
3564 tree scalar_dest;
3565 tree op;
3566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3567 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3568 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3569 tree new_temp;
3570 tree def, def_stmt;
3571 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3572 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3573 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3574 int i;
3575 VEC(tree,heap) *vec_oprnds = NULL;
3576 tree vop;
3577
3578 gcc_assert (ncopies >= 1);
3579 if (ncopies > 1)
3580 return false; /* FORNOW */
3581
3582 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3583 return false;
3584
3585 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3586 return false;
3587
3588 /* FORNOW: not yet supported. */
3589 if (STMT_VINFO_LIVE_P (stmt_info))
3590 {
3591 if (vect_print_dump_info (REPORT_DETAILS))
3592 fprintf (vect_dump, "value used after loop.");
3593 return false;
3594 }
3595
3596 /* Is vectorizable assignment? */
3597 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3598 return false;
3599
3600 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3601 if (TREE_CODE (scalar_dest) != SSA_NAME)
3602 return false;
3603
3604 op = GIMPLE_STMT_OPERAND (stmt, 1);
3605 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3606 {
3607 if (vect_print_dump_info (REPORT_DETAILS))
3608 fprintf (vect_dump, "use not simple.");
3609 return false;
3610 }
3611
3612 if (!vec_stmt) /* transformation not required. */
3613 {
3614 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3615 if (vect_print_dump_info (REPORT_DETAILS))
3616 fprintf (vect_dump, "=== vectorizable_assignment ===");
3617 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3618 return true;
3619 }
3620
3621 /** Transform. **/
3622 if (vect_print_dump_info (REPORT_DETAILS))
3623 fprintf (vect_dump, "transform assignment.");
3624
3625 /* Handle def. */
3626 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3627
3628 /* Handle use. */
3629 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3630
3631 /* Arguments are ready. create the new vector stmt. */
3632 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3633 {
3634 *vec_stmt = build_gimple_modify_stmt (vec_dest, vop);
3635 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3636 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3637 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3638 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3639
3640 if (slp_node)
3641 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3642 }
3643
3644 VEC_free (tree, heap, vec_oprnds);
3645 return true;
3646 }
3647
3648
3649 /* Function vect_min_worthwhile_factor.
3650
3651 For a loop where we could vectorize the operation indicated by CODE,
3652 return the minimum vectorization factor that makes it worthwhile
3653 to use generic vectors. */
3654 static int
3655 vect_min_worthwhile_factor (enum tree_code code)
3656 {
3657 switch (code)
3658 {
3659 case PLUS_EXPR:
3660 case MINUS_EXPR:
3661 case NEGATE_EXPR:
3662 return 4;
3663
3664 case BIT_AND_EXPR:
3665 case BIT_IOR_EXPR:
3666 case BIT_XOR_EXPR:
3667 case BIT_NOT_EXPR:
3668 return 2;
3669
3670 default:
3671 return INT_MAX;
3672 }
3673 }
3674
3675
3676 /* Function vectorizable_induction
3677
3678 Check if PHI performs an induction computation that can be vectorized.
3679 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3680 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3681 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3682
3683 bool
3684 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3685 tree *vec_stmt)
3686 {
3687 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3689 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3690 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3691 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3692 tree vec_def;
3693
3694 gcc_assert (ncopies >= 1);
3695
3696 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3697 return false;
3698
3699 /* FORNOW: SLP not supported. */
3700 if (STMT_SLP_TYPE (stmt_info))
3701 return false;
3702
3703 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3704
3705 if (STMT_VINFO_LIVE_P (stmt_info))
3706 {
3707 /* FORNOW: not yet supported. */
3708 if (vect_print_dump_info (REPORT_DETAILS))
3709 fprintf (vect_dump, "value used after loop.");
3710 return false;
3711 }
3712
3713 if (TREE_CODE (phi) != PHI_NODE)
3714 return false;
3715
3716 if (!vec_stmt) /* transformation not required. */
3717 {
3718 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3719 if (vect_print_dump_info (REPORT_DETAILS))
3720 fprintf (vect_dump, "=== vectorizable_induction ===");
3721 vect_model_induction_cost (stmt_info, ncopies);
3722 return true;
3723 }
3724
3725 /** Transform. **/
3726
3727 if (vect_print_dump_info (REPORT_DETAILS))
3728 fprintf (vect_dump, "transform induction phi.");
3729
3730 vec_def = get_initial_def_for_induction (phi);
3731 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3732 return true;
3733 }
3734
3735
3736 /* Function vectorizable_operation.
3737
3738 Check if STMT performs a binary or unary operation that can be vectorized.
3739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3740 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3741 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3742
3743 bool
3744 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3745 slp_tree slp_node)
3746 {
3747 tree vec_dest;
3748 tree scalar_dest;
3749 tree operation;
3750 tree op0, op1 = NULL;
3751 tree vec_oprnd1 = NULL_TREE;
3752 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3753 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3754 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3755 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3756 enum tree_code code;
3757 enum machine_mode vec_mode;
3758 tree new_temp;
3759 int op_type;
3760 optab optab;
3761 int icode;
3762 enum machine_mode optab_op2_mode;
3763 tree def, def_stmt;
3764 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3765 tree new_stmt = NULL_TREE;
3766 stmt_vec_info prev_stmt_info;
3767 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3768 int nunits_out;
3769 tree vectype_out;
3770 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3771 int j, i;
3772 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3773 tree vop0, vop1;
3774
3775 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3776 this, so we can safely override NCOPIES with 1 here. */
3777 if (slp_node)
3778 ncopies = 1;
3779 gcc_assert (ncopies >= 1);
3780 /* FORNOW. This restriction should be relaxed. */
3781 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3782 {
3783 if (vect_print_dump_info (REPORT_DETAILS))
3784 fprintf (vect_dump, "multiple types in nested loop.");
3785 return false;
3786 }
3787
3788 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3789 return false;
3790
3791 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3792 return false;
3793
3794 /* FORNOW: not yet supported. */
3795 if (STMT_VINFO_LIVE_P (stmt_info))
3796 {
3797 if (vect_print_dump_info (REPORT_DETAILS))
3798 fprintf (vect_dump, "value used after loop.");
3799 return false;
3800 }
3801
3802 /* Is STMT a vectorizable binary/unary operation? */
3803 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3804 return false;
3805
3806 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3807 return false;
3808
3809 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3810 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3811 if (!vectype_out)
3812 return false;
3813 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3814 if (nunits_out != nunits_in)
3815 return false;
3816
3817 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3818 code = TREE_CODE (operation);
3819
3820 /* For pointer addition, we should use the normal plus for
3821 the vector addition. */
3822 if (code == POINTER_PLUS_EXPR)
3823 code = PLUS_EXPR;
3824
3825 optab = optab_for_tree_code (code, vectype);
3826
3827 /* Support only unary or binary operations. */
3828 op_type = TREE_OPERAND_LENGTH (operation);
3829 if (op_type != unary_op && op_type != binary_op)
3830 {
3831 if (vect_print_dump_info (REPORT_DETAILS))
3832 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3833 return false;
3834 }
3835
3836 op0 = TREE_OPERAND (operation, 0);
3837 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3838 {
3839 if (vect_print_dump_info (REPORT_DETAILS))
3840 fprintf (vect_dump, "use not simple.");
3841 return false;
3842 }
3843
3844 if (op_type == binary_op)
3845 {
3846 op1 = TREE_OPERAND (operation, 1);
3847 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3848 {
3849 if (vect_print_dump_info (REPORT_DETAILS))
3850 fprintf (vect_dump, "use not simple.");
3851 return false;
3852 }
3853 }
3854
3855 /* Supportable by target? */
3856 if (!optab)
3857 {
3858 if (vect_print_dump_info (REPORT_DETAILS))
3859 fprintf (vect_dump, "no optab.");
3860 return false;
3861 }
3862 vec_mode = TYPE_MODE (vectype);
3863 icode = (int) optab_handler (optab, vec_mode)->insn_code;
3864 if (icode == CODE_FOR_nothing)
3865 {
3866 if (vect_print_dump_info (REPORT_DETAILS))
3867 fprintf (vect_dump, "op not supported by target.");
3868 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3869 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3870 < vect_min_worthwhile_factor (code))
3871 return false;
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "proceeding using word mode.");
3874 }
3875
3876 /* Worthwhile without SIMD support? */
3877 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3878 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3879 < vect_min_worthwhile_factor (code))
3880 {
3881 if (vect_print_dump_info (REPORT_DETAILS))
3882 fprintf (vect_dump, "not worthwhile without SIMD support.");
3883 return false;
3884 }
3885
3886 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3887 {
3888 /* FORNOW: not yet supported. */
3889 if (!VECTOR_MODE_P (vec_mode))
3890 return false;
3891
3892 /* Invariant argument is needed for a vector shift
3893 by a scalar shift operand. */
3894 optab_op2_mode = insn_data[icode].operand[2].mode;
3895 if (! (VECTOR_MODE_P (optab_op2_mode)
3896 || dt[1] == vect_constant_def
3897 || dt[1] == vect_invariant_def))
3898 {
3899 if (vect_print_dump_info (REPORT_DETAILS))
3900 fprintf (vect_dump, "operand mode requires invariant argument.");
3901 return false;
3902 }
3903 }
3904
3905 if (!vec_stmt) /* transformation not required. */
3906 {
3907 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3908 if (vect_print_dump_info (REPORT_DETAILS))
3909 fprintf (vect_dump, "=== vectorizable_operation ===");
3910 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3911 return true;
3912 }
3913
3914 /** Transform. **/
3915
3916 if (vect_print_dump_info (REPORT_DETAILS))
3917 fprintf (vect_dump, "transform binary/unary operation.");
3918
3919 /* Handle def. */
3920 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3921
3922 if (!slp_node)
3923 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3924 if (op_type == binary_op)
3925 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3926
3927 /* In case the vectorization factor (VF) is bigger than the number
3928 of elements that we can fit in a vectype (nunits), we have to generate
3929 more than one vector stmt - i.e - we need to "unroll" the
3930 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3931 from one copy of the vector stmt to the next, in the field
3932 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3933 stages to find the correct vector defs to be used when vectorizing
3934 stmts that use the defs of the current stmt. The example below illustrates
3935 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3936 4 vectorized stmts):
3937
3938 before vectorization:
3939 RELATED_STMT VEC_STMT
3940 S1: x = memref - -
3941 S2: z = x + 1 - -
3942
3943 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3944 there):
3945 RELATED_STMT VEC_STMT
3946 VS1_0: vx0 = memref0 VS1_1 -
3947 VS1_1: vx1 = memref1 VS1_2 -
3948 VS1_2: vx2 = memref2 VS1_3 -
3949 VS1_3: vx3 = memref3 - -
3950 S1: x = load - VS1_0
3951 S2: z = x + 1 - -
3952
3953 step2: vectorize stmt S2 (done here):
3954 To vectorize stmt S2 we first need to find the relevant vector
3955 def for the first operand 'x'. This is, as usual, obtained from
3956 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3957 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3958 relevant vector def 'vx0'. Having found 'vx0' we can generate
3959 the vector stmt VS2_0, and as usual, record it in the
3960 STMT_VINFO_VEC_STMT of stmt S2.
3961 When creating the second copy (VS2_1), we obtain the relevant vector
3962 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3963 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3964 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3965 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3966 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3967 chain of stmts and pointers:
3968 RELATED_STMT VEC_STMT
3969 VS1_0: vx0 = memref0 VS1_1 -
3970 VS1_1: vx1 = memref1 VS1_2 -
3971 VS1_2: vx2 = memref2 VS1_3 -
3972 VS1_3: vx3 = memref3 - -
3973 S1: x = load - VS1_0
3974 VS2_0: vz0 = vx0 + v1 VS2_1 -
3975 VS2_1: vz1 = vx1 + v1 VS2_2 -
3976 VS2_2: vz2 = vx2 + v1 VS2_3 -
3977 VS2_3: vz3 = vx3 + v1 - -
3978 S2: z = x + 1 - VS2_0 */
3979
3980 prev_stmt_info = NULL;
3981 for (j = 0; j < ncopies; j++)
3982 {
3983 /* Handle uses. */
3984 if (j == 0)
3985 {
3986 if (op_type == binary_op
3987 && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
3988 {
3989 /* Vector shl and shr insn patterns can be defined with scalar
3990 operand 2 (shift operand). In this case, use constant or loop
3991 invariant op1 directly, without extending it to vector mode
3992 first. */
3993 optab_op2_mode = insn_data[icode].operand[2].mode;
3994 if (!VECTOR_MODE_P (optab_op2_mode))
3995 {
3996 if (vect_print_dump_info (REPORT_DETAILS))
3997 fprintf (vect_dump, "operand 1 using scalar mode.");
3998 vec_oprnd1 = op1;
3999 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
4000 }
4001 }
4002
4003 /* vec_oprnd is available if operand 1 should be of a scalar-type
4004 (a special case for certain kind of vector shifts); otherwise,
4005 operand 1 should be of a vector type (the usual case). */
4006 if (op_type == binary_op && !vec_oprnd1)
4007 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4008 slp_node);
4009 else
4010 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4011 slp_node);
4012 }
4013 else
4014 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4015
4016 /* Arguments are ready. Create the new vector stmt. */
4017 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
4018 {
4019 if (op_type == binary_op)
4020 {
4021 vop1 = VEC_index (tree, vec_oprnds1, i);
4022 new_stmt = build_gimple_modify_stmt (vec_dest,
4023 build2 (code, vectype, vop0, vop1));
4024 }
4025 else
4026 new_stmt = build_gimple_modify_stmt (vec_dest,
4027 build1 (code, vectype, vop0));
4028
4029 new_temp = make_ssa_name (vec_dest, new_stmt);
4030 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4031 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4032 if (slp_node)
4033 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4034 }
4035
4036 if (j == 0)
4037 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4038 else
4039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4040 prev_stmt_info = vinfo_for_stmt (new_stmt);
4041 }
4042
4043 VEC_free (tree, heap, vec_oprnds0);
4044 if (vec_oprnds1)
4045 VEC_free (tree, heap, vec_oprnds1);
4046
4047 return true;
4048 }
4049
4050
4051 /* Function vectorizable_type_demotion
4052
4053 Check if STMT performs a binary or unary operation that involves
4054 type demotion, and if it can be vectorized.
4055 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4056 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4057 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4058
4059 bool
4060 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
4061 tree *vec_stmt)
4062 {
4063 tree vec_dest;
4064 tree scalar_dest;
4065 tree operation;
4066 tree op0;
4067 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4070 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4071 enum tree_code code, code1 = ERROR_MARK;
4072 tree new_temp;
4073 tree def, def_stmt;
4074 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4075 tree new_stmt;
4076 stmt_vec_info prev_stmt_info;
4077 int nunits_in;
4078 int nunits_out;
4079 tree vectype_out;
4080 int ncopies;
4081 int j;
4082 tree expr;
4083 tree vectype_in;
4084
4085 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4086 return false;
4087
4088 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4089 return false;
4090
4091 /* FORNOW: not yet supported. */
4092 if (STMT_VINFO_LIVE_P (stmt_info))
4093 {
4094 if (vect_print_dump_info (REPORT_DETAILS))
4095 fprintf (vect_dump, "value used after loop.");
4096 return false;
4097 }
4098
4099 /* Is STMT a vectorizable type-demotion operation? */
4100 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4101 return false;
4102
4103 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4104 return false;
4105
4106 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4107 code = TREE_CODE (operation);
4108 if (code != NOP_EXPR && code != CONVERT_EXPR)
4109 return false;
4110
4111 op0 = TREE_OPERAND (operation, 0);
4112 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4113 if (!vectype_in)
4114 return false;
4115 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4116
4117 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4118 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4119 if (!vectype_out)
4120 return false;
4121 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4122 if (nunits_in != nunits_out / 2) /* FORNOW */
4123 return false;
4124
4125 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4126 gcc_assert (ncopies >= 1);
4127 /* FORNOW. This restriction should be relaxed. */
4128 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4129 {
4130 if (vect_print_dump_info (REPORT_DETAILS))
4131 fprintf (vect_dump, "multiple types in nested loop.");
4132 return false;
4133 }
4134
4135 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4136 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4137 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4138 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4139 && (code == NOP_EXPR || code == CONVERT_EXPR))))
4140 return false;
4141
4142 /* Check the operands of the operation. */
4143 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4144 {
4145 if (vect_print_dump_info (REPORT_DETAILS))
4146 fprintf (vect_dump, "use not simple.");
4147 return false;
4148 }
4149
4150 /* Supportable by target? */
4151 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
4152 return false;
4153
4154 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4155
4156 if (!vec_stmt) /* transformation not required. */
4157 {
4158 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4159 if (vect_print_dump_info (REPORT_DETAILS))
4160 fprintf (vect_dump, "=== vectorizable_demotion ===");
4161 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4162 return true;
4163 }
4164
4165 /** Transform. **/
4166 if (vect_print_dump_info (REPORT_DETAILS))
4167 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
4168 ncopies);
4169
4170 /* Handle def. */
4171 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4172
4173 /* In case the vectorization factor (VF) is bigger than the number
4174 of elements that we can fit in a vectype (nunits), we have to generate
4175 more than one vector stmt - i.e - we need to "unroll" the
4176 vector stmt by a factor VF/nunits. */
4177 prev_stmt_info = NULL;
4178 for (j = 0; j < ncopies; j++)
4179 {
4180 /* Handle uses. */
4181 if (j == 0)
4182 {
4183 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4184 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4185 }
4186 else
4187 {
4188 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
4189 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4190 }
4191
4192 /* Arguments are ready. Create the new vector stmt. */
4193 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
4194 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
4195 new_temp = make_ssa_name (vec_dest, new_stmt);
4196 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4197 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4198
4199 if (j == 0)
4200 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4201 else
4202 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4203
4204 prev_stmt_info = vinfo_for_stmt (new_stmt);
4205 }
4206
4207 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4208 return true;
4209 }
4210
4211
4212 /* Function vectorizable_type_promotion
4213
4214 Check if STMT performs a binary or unary operation that involves
4215 type promotion, and if it can be vectorized.
4216 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4217 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4218 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4219
4220 bool
4221 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
4222 tree *vec_stmt)
4223 {
4224 tree vec_dest;
4225 tree scalar_dest;
4226 tree operation;
4227 tree op0, op1 = NULL;
4228 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4229 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4230 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4232 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4233 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4234 int op_type;
4235 tree def, def_stmt;
4236 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4237 tree new_stmt;
4238 stmt_vec_info prev_stmt_info;
4239 int nunits_in;
4240 int nunits_out;
4241 tree vectype_out;
4242 int ncopies;
4243 int j;
4244 tree vectype_in;
4245
4246 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4247 return false;
4248
4249 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4250 return false;
4251
4252 /* FORNOW: not yet supported. */
4253 if (STMT_VINFO_LIVE_P (stmt_info))
4254 {
4255 if (vect_print_dump_info (REPORT_DETAILS))
4256 fprintf (vect_dump, "value used after loop.");
4257 return false;
4258 }
4259
4260 /* Is STMT a vectorizable type-promotion operation? */
4261 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4262 return false;
4263
4264 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4265 return false;
4266
4267 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4268 code = TREE_CODE (operation);
4269 if (code != NOP_EXPR && code != CONVERT_EXPR
4270 && code != WIDEN_MULT_EXPR)
4271 return false;
4272
4273 op0 = TREE_OPERAND (operation, 0);
4274 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4275 if (!vectype_in)
4276 return false;
4277 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4278
4279 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4280 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4281 if (!vectype_out)
4282 return false;
4283 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4284 if (nunits_out != nunits_in / 2) /* FORNOW */
4285 return false;
4286
4287 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4288 gcc_assert (ncopies >= 1);
4289 /* FORNOW. This restriction should be relaxed. */
4290 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4291 {
4292 if (vect_print_dump_info (REPORT_DETAILS))
4293 fprintf (vect_dump, "multiple types in nested loop.");
4294 return false;
4295 }
4296
4297 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4298 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4299 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4300 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4301 && (code == CONVERT_EXPR || code == NOP_EXPR))))
4302 return false;
4303
4304 /* Check the operands of the operation. */
4305 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4306 {
4307 if (vect_print_dump_info (REPORT_DETAILS))
4308 fprintf (vect_dump, "use not simple.");
4309 return false;
4310 }
4311
4312 op_type = TREE_CODE_LENGTH (code);
4313 if (op_type == binary_op)
4314 {
4315 op1 = TREE_OPERAND (operation, 1);
4316 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4317 {
4318 if (vect_print_dump_info (REPORT_DETAILS))
4319 fprintf (vect_dump, "use not simple.");
4320 return false;
4321 }
4322 }
4323
4324 /* Supportable by target? */
4325 if (!supportable_widening_operation (code, stmt, vectype_in,
4326 &decl1, &decl2, &code1, &code2))
4327 return false;
4328
4329 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4330
4331 if (!vec_stmt) /* transformation not required. */
4332 {
4333 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4334 if (vect_print_dump_info (REPORT_DETAILS))
4335 fprintf (vect_dump, "=== vectorizable_promotion ===");
4336 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
4337 return true;
4338 }
4339
4340 /** Transform. **/
4341
4342 if (vect_print_dump_info (REPORT_DETAILS))
4343 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4344 ncopies);
4345
4346 /* Handle def. */
4347 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4348
4349 /* In case the vectorization factor (VF) is bigger than the number
4350 of elements that we can fit in a vectype (nunits), we have to generate
4351 more than one vector stmt - i.e - we need to "unroll" the
4352 vector stmt by a factor VF/nunits. */
4353
4354 prev_stmt_info = NULL;
4355 for (j = 0; j < ncopies; j++)
4356 {
4357 /* Handle uses. */
4358 if (j == 0)
4359 {
4360 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4361 if (op_type == binary_op)
4362 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4363 }
4364 else
4365 {
4366 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4367 if (op_type == binary_op)
4368 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
4369 }
4370
4371 /* Arguments are ready. Create the new vector stmt. We are creating
4372 two vector defs because the widened result does not fit in one vector.
4373 The vectorized stmt can be expressed as a call to a taregt builtin,
4374 or a using a tree-code. */
4375 /* Generate first half of the widened result: */
4376 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
4377 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4378 if (j == 0)
4379 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4380 else
4381 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4382 prev_stmt_info = vinfo_for_stmt (new_stmt);
4383
4384 /* Generate second half of the widened result: */
4385 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
4386 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4387 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4388 prev_stmt_info = vinfo_for_stmt (new_stmt);
4389
4390 }
4391
4392 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4393 return true;
4394 }
4395
4396
4397 /* Function vect_strided_store_supported.
4398
4399 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4400 and FALSE otherwise. */
4401
4402 static bool
4403 vect_strided_store_supported (tree vectype)
4404 {
4405 optab interleave_high_optab, interleave_low_optab;
4406 int mode;
4407
4408 mode = (int) TYPE_MODE (vectype);
4409
4410 /* Check that the operation is supported. */
4411 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4412 vectype);
4413 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4414 vectype);
4415 if (!interleave_high_optab || !interleave_low_optab)
4416 {
4417 if (vect_print_dump_info (REPORT_DETAILS))
4418 fprintf (vect_dump, "no optab for interleave.");
4419 return false;
4420 }
4421
4422 if (optab_handler (interleave_high_optab, mode)->insn_code
4423 == CODE_FOR_nothing
4424 || optab_handler (interleave_low_optab, mode)->insn_code
4425 == CODE_FOR_nothing)
4426 {
4427 if (vect_print_dump_info (REPORT_DETAILS))
4428 fprintf (vect_dump, "interleave op not supported by target.");
4429 return false;
4430 }
4431
4432 return true;
4433 }
4434
4435
4436 /* Function vect_permute_store_chain.
4437
4438 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4439 a power of 2, generate interleave_high/low stmts to reorder the data
4440 correctly for the stores. Return the final references for stores in
4441 RESULT_CHAIN.
4442
4443 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4444 The input is 4 vectors each containing 8 elements. We assign a number to each
4445 element, the input sequence is:
4446
4447 1st vec: 0 1 2 3 4 5 6 7
4448 2nd vec: 8 9 10 11 12 13 14 15
4449 3rd vec: 16 17 18 19 20 21 22 23
4450 4th vec: 24 25 26 27 28 29 30 31
4451
4452 The output sequence should be:
4453
4454 1st vec: 0 8 16 24 1 9 17 25
4455 2nd vec: 2 10 18 26 3 11 19 27
4456 3rd vec: 4 12 20 28 5 13 21 30
4457 4th vec: 6 14 22 30 7 15 23 31
4458
4459 i.e., we interleave the contents of the four vectors in their order.
4460
4461 We use interleave_high/low instructions to create such output. The input of
4462 each interleave_high/low operation is two vectors:
4463 1st vec 2nd vec
4464 0 1 2 3 4 5 6 7
4465 the even elements of the result vector are obtained left-to-right from the
4466 high/low elements of the first vector. The odd elements of the result are
4467 obtained left-to-right from the high/low elements of the second vector.
4468 The output of interleave_high will be: 0 4 1 5
4469 and of interleave_low: 2 6 3 7
4470
4471
4472 The permutation is done in log LENGTH stages. In each stage interleave_high
4473 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4474 where the first argument is taken from the first half of DR_CHAIN and the
4475 second argument from it's second half.
4476 In our example,
4477
4478 I1: interleave_high (1st vec, 3rd vec)
4479 I2: interleave_low (1st vec, 3rd vec)
4480 I3: interleave_high (2nd vec, 4th vec)
4481 I4: interleave_low (2nd vec, 4th vec)
4482
4483 The output for the first stage is:
4484
4485 I1: 0 16 1 17 2 18 3 19
4486 I2: 4 20 5 21 6 22 7 23
4487 I3: 8 24 9 25 10 26 11 27
4488 I4: 12 28 13 29 14 30 15 31
4489
4490 The output of the second stage, i.e. the final result is:
4491
4492 I1: 0 8 16 24 1 9 17 25
4493 I2: 2 10 18 26 3 11 19 27
4494 I3: 4 12 20 28 5 13 21 30
4495 I4: 6 14 22 30 7 15 23 31. */
4496
4497 static bool
4498 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4499 unsigned int length,
4500 tree stmt,
4501 block_stmt_iterator *bsi,
4502 VEC(tree,heap) **result_chain)
4503 {
4504 tree perm_dest, perm_stmt, vect1, vect2, high, low;
4505 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4506 tree scalar_dest, tmp;
4507 int i;
4508 unsigned int j;
4509 VEC(tree,heap) *first, *second;
4510
4511 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4512 first = VEC_alloc (tree, heap, length/2);
4513 second = VEC_alloc (tree, heap, length/2);
4514
4515 /* Check that the operation is supported. */
4516 if (!vect_strided_store_supported (vectype))
4517 return false;
4518
4519 *result_chain = VEC_copy (tree, heap, dr_chain);
4520
4521 for (i = 0; i < exact_log2 (length); i++)
4522 {
4523 for (j = 0; j < length/2; j++)
4524 {
4525 vect1 = VEC_index (tree, dr_chain, j);
4526 vect2 = VEC_index (tree, dr_chain, j+length/2);
4527
4528 /* Create interleaving stmt:
4529 in the case of big endian:
4530 high = interleave_high (vect1, vect2)
4531 and in the case of little endian:
4532 high = interleave_low (vect1, vect2). */
4533 perm_dest = create_tmp_var (vectype, "vect_inter_high");
4534 DECL_GIMPLE_REG_P (perm_dest) = 1;
4535 add_referenced_var (perm_dest);
4536 if (BYTES_BIG_ENDIAN)
4537 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4538 else
4539 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4540 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4541 high = make_ssa_name (perm_dest, perm_stmt);
4542 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
4543 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4544 VEC_replace (tree, *result_chain, 2*j, high);
4545
4546 /* Create interleaving stmt:
4547 in the case of big endian:
4548 low = interleave_low (vect1, vect2)
4549 and in the case of little endian:
4550 low = interleave_high (vect1, vect2). */
4551 perm_dest = create_tmp_var (vectype, "vect_inter_low");
4552 DECL_GIMPLE_REG_P (perm_dest) = 1;
4553 add_referenced_var (perm_dest);
4554 if (BYTES_BIG_ENDIAN)
4555 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4556 else
4557 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4558 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4559 low = make_ssa_name (perm_dest, perm_stmt);
4560 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
4561 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4562 VEC_replace (tree, *result_chain, 2*j+1, low);
4563 }
4564 dr_chain = VEC_copy (tree, heap, *result_chain);
4565 }
4566 return true;
4567 }
4568
4569
4570 /* Function vectorizable_store.
4571
4572 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4573 can be vectorized.
4574 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4575 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4576 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4577
4578 bool
4579 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
4580 slp_tree slp_node)
4581 {
4582 tree scalar_dest;
4583 tree data_ref;
4584 tree op;
4585 tree vec_oprnd = NULL_TREE;
4586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4587 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4588 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4589 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4590 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4591 enum machine_mode vec_mode;
4592 tree dummy;
4593 enum dr_alignment_support alignment_support_scheme;
4594 tree def, def_stmt;
4595 enum vect_def_type dt;
4596 stmt_vec_info prev_stmt_info = NULL;
4597 tree dataref_ptr = NULL_TREE;
4598 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4599 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4600 int j;
4601 tree next_stmt, first_stmt;
4602 bool strided_store = false;
4603 unsigned int group_size, i;
4604 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
4605 bool inv_p;
4606 VEC(tree,heap) *vec_oprnds = NULL;
4607 bool slp = (slp_node != NULL);
4608 stmt_vec_info first_stmt_vinfo;
4609 unsigned int vec_num;
4610
4611 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4612 this, so we can safely override NCOPIES with 1 here. */
4613 if (slp)
4614 ncopies = 1;
4615
4616 gcc_assert (ncopies >= 1);
4617
4618 /* FORNOW. This restriction should be relaxed. */
4619 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4620 {
4621 if (vect_print_dump_info (REPORT_DETAILS))
4622 fprintf (vect_dump, "multiple types in nested loop.");
4623 return false;
4624 }
4625
4626 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4627 return false;
4628
4629 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4630 return false;
4631
4632 if (STMT_VINFO_LIVE_P (stmt_info))
4633 {
4634 if (vect_print_dump_info (REPORT_DETAILS))
4635 fprintf (vect_dump, "value used after loop.");
4636 return false;
4637 }
4638
4639 /* Is vectorizable store? */
4640
4641 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4642 return false;
4643
4644 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4645 if (TREE_CODE (scalar_dest) != ARRAY_REF
4646 && TREE_CODE (scalar_dest) != INDIRECT_REF
4647 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
4648 return false;
4649
4650 op = GIMPLE_STMT_OPERAND (stmt, 1);
4651 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4652 {
4653 if (vect_print_dump_info (REPORT_DETAILS))
4654 fprintf (vect_dump, "use not simple.");
4655 return false;
4656 }
4657
4658 vec_mode = TYPE_MODE (vectype);
4659 /* FORNOW. In some cases can vectorize even if data-type not supported
4660 (e.g. - array initialization with 0). */
4661 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
4662 return false;
4663
4664 if (!STMT_VINFO_DATA_REF (stmt_info))
4665 return false;
4666
4667 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4668 {
4669 strided_store = true;
4670 if (!vect_strided_store_supported (vectype)
4671 && !PURE_SLP_STMT (stmt_info) && !slp)
4672 return false;
4673 }
4674
4675 if (!vec_stmt) /* transformation not required. */
4676 {
4677 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4678 if (!PURE_SLP_STMT (stmt_info))
4679 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
4680 return true;
4681 }
4682
4683 /** Transform. **/
4684
4685 if (strided_store)
4686 {
4687 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4688 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4689 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4690
4691 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4692
4693 /* FORNOW */
4694 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
4695
4696 /* We vectorize all the stmts of the interleaving group when we
4697 reach the last stmt in the group. */
4698 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4699 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
4700 && !slp)
4701 {
4702 *vec_stmt = NULL_TREE;
4703 return true;
4704 }
4705
4706 if (slp)
4707 strided_store = false;
4708
4709 /* VEC_NUM is the number of vect stmts to be created for this group. */
4710 if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
4711 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4712 else
4713 vec_num = group_size;
4714 }
4715 else
4716 {
4717 first_stmt = stmt;
4718 first_dr = dr;
4719 group_size = vec_num = 1;
4720 first_stmt_vinfo = stmt_info;
4721 }
4722
4723 if (vect_print_dump_info (REPORT_DETAILS))
4724 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
4725
4726 dr_chain = VEC_alloc (tree, heap, group_size);
4727 oprnds = VEC_alloc (tree, heap, group_size);
4728
4729 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
4730 gcc_assert (alignment_support_scheme);
4731 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
4732
4733 /* In case the vectorization factor (VF) is bigger than the number
4734 of elements that we can fit in a vectype (nunits), we have to generate
4735 more than one vector stmt - i.e - we need to "unroll" the
4736 vector stmt by a factor VF/nunits. For more details see documentation in
4737 vect_get_vec_def_for_copy_stmt. */
4738
4739 /* In case of interleaving (non-unit strided access):
4740
4741 S1: &base + 2 = x2
4742 S2: &base = x0
4743 S3: &base + 1 = x1
4744 S4: &base + 3 = x3
4745
4746 We create vectorized stores starting from base address (the access of the
4747 first stmt in the chain (S2 in the above example), when the last store stmt
4748 of the chain (S4) is reached:
4749
4750 VS1: &base = vx2
4751 VS2: &base + vec_size*1 = vx0
4752 VS3: &base + vec_size*2 = vx1
4753 VS4: &base + vec_size*3 = vx3
4754
4755 Then permutation statements are generated:
4756
4757 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4758 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4759 ...
4760
4761 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4762 (the order of the data-refs in the output of vect_permute_store_chain
4763 corresponds to the order of scalar stmts in the interleaving chain - see
4764 the documentation of vect_permute_store_chain()).
4765
4766 In case of both multiple types and interleaving, above vector stores and
4767 permutation stmts are created for every copy. The result vector stmts are
4768 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4769 STMT_VINFO_RELATED_STMT for the next copies.
4770 */
4771
4772 prev_stmt_info = NULL;
4773 for (j = 0; j < ncopies; j++)
4774 {
4775 tree new_stmt;
4776 tree ptr_incr;
4777
4778 if (j == 0)
4779 {
4780 if (slp)
4781 {
4782 /* Get vectorized arguments for SLP_NODE. */
4783 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
4784
4785 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4786 }
4787 else
4788 {
4789 /* For interleaved stores we collect vectorized defs for all the
4790 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4791 used as an input to vect_permute_store_chain(), and OPRNDS as
4792 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4793
4794 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4795 OPRNDS are of size 1. */
4796 next_stmt = first_stmt;
4797 for (i = 0; i < group_size; i++)
4798 {
4799 /* Since gaps are not supported for interleaved stores,
4800 GROUP_SIZE is the exact number of stmts in the chain.
4801 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4802 there is no interleaving, GROUP_SIZE is 1, and only one
4803 iteration of the loop will be executed. */
4804 gcc_assert (next_stmt);
4805 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4806
4807 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4808 NULL);
4809 VEC_quick_push(tree, dr_chain, vec_oprnd);
4810 VEC_quick_push(tree, oprnds, vec_oprnd);
4811 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4812 }
4813 }
4814 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
4815 &dummy, &ptr_incr, false,
4816 TREE_TYPE (vec_oprnd), &inv_p);
4817 gcc_assert (!inv_p);
4818 }
4819 else
4820 {
4821 /* FORNOW SLP doesn't work for multiple types. */
4822 gcc_assert (!slp);
4823
4824 /* For interleaved stores we created vectorized defs for all the
4825 defs stored in OPRNDS in the previous iteration (previous copy).
4826 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4827 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4828 next copy.
4829 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4830 OPRNDS are of size 1. */
4831 for (i = 0; i < group_size; i++)
4832 {
4833 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4834 VEC_index (tree, oprnds, i));
4835 VEC_replace(tree, dr_chain, i, vec_oprnd);
4836 VEC_replace(tree, oprnds, i, vec_oprnd);
4837 }
4838 dataref_ptr =
4839 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
4840 }
4841
4842 if (strided_store)
4843 {
4844 result_chain = VEC_alloc (tree, heap, group_size);
4845 /* Permute. */
4846 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4847 &result_chain))
4848 return false;
4849 }
4850
4851 next_stmt = first_stmt;
4852 for (i = 0; i < vec_num; i++)
4853 {
4854 if (i > 0)
4855 /* Bump the vector pointer. */
4856 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
4857 NULL_TREE);
4858
4859 if (slp)
4860 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4861 else if (strided_store)
4862 /* For strided stores vectorized defs are interleaved in
4863 vect_permute_store_chain(). */
4864 vec_oprnd = VEC_index (tree, result_chain, i);
4865
4866 data_ref = build_fold_indirect_ref (dataref_ptr);
4867 /* Arguments are ready. Create the new vector stmt. */
4868 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4869 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4870 mark_symbols_for_renaming (new_stmt);
4871
4872 if (j == 0)
4873 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4874 else
4875 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4876
4877 prev_stmt_info = vinfo_for_stmt (new_stmt);
4878 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4879 if (!next_stmt)
4880 break;
4881 }
4882 }
4883
4884 return true;
4885 }
4886
4887
4888 /* Function vect_setup_realignment
4889
4890 This function is called when vectorizing an unaligned load using
4891 the dr_explicit_realign[_optimized] scheme.
4892 This function generates the following code at the loop prolog:
4893
4894 p = initial_addr;
4895 x msq_init = *(floor(p)); # prolog load
4896 realignment_token = call target_builtin;
4897 loop:
4898 x msq = phi (msq_init, ---)
4899
4900 The stmts marked with x are generated only for the case of
4901 dr_explicit_realign_optimized.
4902
4903 The code above sets up a new (vector) pointer, pointing to the first
4904 location accessed by STMT, and a "floor-aligned" load using that pointer.
4905 It also generates code to compute the "realignment-token" (if the relevant
4906 target hook was defined), and creates a phi-node at the loop-header bb
4907 whose arguments are the result of the prolog-load (created by this
4908 function) and the result of a load that takes place in the loop (to be
4909 created by the caller to this function).
4910
4911 For the case of dr_explicit_realign_optimized:
4912 The caller to this function uses the phi-result (msq) to create the
4913 realignment code inside the loop, and sets up the missing phi argument,
4914 as follows:
4915 loop:
4916 msq = phi (msq_init, lsq)
4917 lsq = *(floor(p')); # load in loop
4918 result = realign_load (msq, lsq, realignment_token);
4919
4920 For the case of dr_explicit_realign:
4921 loop:
4922 msq = *(floor(p)); # load in loop
4923 p' = p + (VS-1);
4924 lsq = *(floor(p')); # load in loop
4925 result = realign_load (msq, lsq, realignment_token);
4926
4927 Input:
4928 STMT - (scalar) load stmt to be vectorized. This load accesses
4929 a memory location that may be unaligned.
4930 BSI - place where new code is to be inserted.
4931 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4932 is used.
4933
4934 Output:
4935 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4936 target hook, if defined.
4937 Return value - the result of the loop-header phi node. */
4938
4939 static tree
4940 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4941 tree *realignment_token,
4942 enum dr_alignment_support alignment_support_scheme,
4943 tree init_addr,
4944 struct loop **at_loop)
4945 {
4946 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4948 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4949 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4950 edge pe;
4951 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4952 tree vec_dest;
4953 tree inc;
4954 tree ptr;
4955 tree data_ref;
4956 tree new_stmt;
4957 basic_block new_bb;
4958 tree msq_init = NULL_TREE;
4959 tree new_temp;
4960 tree phi_stmt;
4961 tree msq = NULL_TREE;
4962 tree stmts = NULL_TREE;
4963 bool inv_p;
4964 bool compute_in_loop = false;
4965 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4966 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
4967 struct loop *loop_for_initial_load;
4968
4969 gcc_assert (alignment_support_scheme == dr_explicit_realign
4970 || alignment_support_scheme == dr_explicit_realign_optimized);
4971
4972 /* We need to generate three things:
4973 1. the misalignment computation
4974 2. the extra vector load (for the optimized realignment scheme).
4975 3. the phi node for the two vectors from which the realignment is
4976 done (for the optimized realignment scheme).
4977 */
4978
4979 /* 1. Determine where to generate the misalignment computation.
4980
4981 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4982 calculation will be generated by this function, outside the loop (in the
4983 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4984 caller, inside the loop.
4985
4986 Background: If the misalignment remains fixed throughout the iterations of
4987 the loop, then both realignment schemes are applicable, and also the
4988 misalignment computation can be done outside LOOP. This is because we are
4989 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4990 are a multiple of VS (the Vector Size), and therefore the misalignment in
4991 different vectorized LOOP iterations is always the same.
4992 The problem arises only if the memory access is in an inner-loop nested
4993 inside LOOP, which is now being vectorized using outer-loop vectorization.
4994 This is the only case when the misalignment of the memory access may not
4995 remain fixed throughout the iterations of the inner-loop (as explained in
4996 detail in vect_supportable_dr_alignment). In this case, not only is the
4997 optimized realignment scheme not applicable, but also the misalignment
4998 computation (and generation of the realignment token that is passed to
4999 REALIGN_LOAD) have to be done inside the loop.
5000
5001 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5002 or not, which in turn determines if the misalignment is computed inside
5003 the inner-loop, or outside LOOP. */
5004
5005 if (init_addr != NULL_TREE)
5006 {
5007 compute_in_loop = true;
5008 gcc_assert (alignment_support_scheme == dr_explicit_realign);
5009 }
5010
5011
5012 /* 2. Determine where to generate the extra vector load.
5013
5014 For the optimized realignment scheme, instead of generating two vector
5015 loads in each iteration, we generate a single extra vector load in the
5016 preheader of the loop, and in each iteration reuse the result of the
5017 vector load from the previous iteration. In case the memory access is in
5018 an inner-loop nested inside LOOP, which is now being vectorized using
5019 outer-loop vectorization, we need to determine whether this initial vector
5020 load should be generated at the preheader of the inner-loop, or can be
5021 generated at the preheader of LOOP. If the memory access has no evolution
5022 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5023 to be generated inside LOOP (in the preheader of the inner-loop). */
5024
5025 if (nested_in_vect_loop)
5026 {
5027 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5028 bool invariant_in_outerloop =
5029 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5030 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5031 }
5032 else
5033 loop_for_initial_load = loop;
5034 if (at_loop)
5035 *at_loop = loop_for_initial_load;
5036
5037 /* 3. For the case of the optimized realignment, create the first vector
5038 load at the loop preheader. */
5039
5040 if (alignment_support_scheme == dr_explicit_realign_optimized)
5041 {
5042 /* Create msq_init = *(floor(p1)) in the loop preheader */
5043
5044 gcc_assert (!compute_in_loop);
5045 pe = loop_preheader_edge (loop_for_initial_load);
5046 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5047 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5048 &init_addr, &inc, true, NULL_TREE, &inv_p);
5049 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5050 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5051 new_temp = make_ssa_name (vec_dest, new_stmt);
5052 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5053 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5054 gcc_assert (!new_bb);
5055 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
5056 }
5057
5058 /* 4. Create realignment token using a target builtin, if available.
5059 It is done either inside the containing loop, or before LOOP (as
5060 determined above). */
5061
5062 if (targetm.vectorize.builtin_mask_for_load)
5063 {
5064 tree builtin_decl;
5065
5066 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5067 if (compute_in_loop)
5068 gcc_assert (init_addr); /* already computed by the caller. */
5069 else
5070 {
5071 /* Generate the INIT_ADDR computation outside LOOP. */
5072 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5073 NULL_TREE, loop);
5074 pe = loop_preheader_edge (loop);
5075 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
5076 gcc_assert (!new_bb);
5077 }
5078
5079 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5080 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
5081 vec_dest = vect_create_destination_var (scalar_dest,
5082 TREE_TYPE (new_stmt));
5083 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5084 new_temp = make_ssa_name (vec_dest, new_stmt);
5085 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5086
5087 if (compute_in_loop)
5088 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
5089 else
5090 {
5091 /* Generate the misalignment computation outside LOOP. */
5092 pe = loop_preheader_edge (loop);
5093 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5094 gcc_assert (!new_bb);
5095 }
5096
5097 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
5098
5099 /* The result of the CALL_EXPR to this builtin is determined from
5100 the value of the parameter and no global variables are touched
5101 which makes the builtin a "const" function. Requiring the
5102 builtin to have the "const" attribute makes it unnecessary
5103 to call mark_call_clobbered. */
5104 gcc_assert (TREE_READONLY (builtin_decl));
5105 }
5106
5107 if (alignment_support_scheme == dr_explicit_realign)
5108 return msq;
5109
5110 gcc_assert (!compute_in_loop);
5111 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5112
5113
5114 /* 5. Create msq = phi <msq_init, lsq> in loop */
5115
5116 pe = loop_preheader_edge (containing_loop);
5117 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5118 msq = make_ssa_name (vec_dest, NULL_TREE);
5119 phi_stmt = create_phi_node (msq, containing_loop->header);
5120 SSA_NAME_DEF_STMT (msq) = phi_stmt;
5121 add_phi_arg (phi_stmt, msq_init, pe);
5122
5123 return msq;
5124 }
5125
5126
5127 /* Function vect_strided_load_supported.
5128
5129 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5130 and FALSE otherwise. */
5131
5132 static bool
5133 vect_strided_load_supported (tree vectype)
5134 {
5135 optab perm_even_optab, perm_odd_optab;
5136 int mode;
5137
5138 mode = (int) TYPE_MODE (vectype);
5139
5140 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
5141 if (!perm_even_optab)
5142 {
5143 if (vect_print_dump_info (REPORT_DETAILS))
5144 fprintf (vect_dump, "no optab for perm_even.");
5145 return false;
5146 }
5147
5148 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
5149 {
5150 if (vect_print_dump_info (REPORT_DETAILS))
5151 fprintf (vect_dump, "perm_even op not supported by target.");
5152 return false;
5153 }
5154
5155 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
5156 if (!perm_odd_optab)
5157 {
5158 if (vect_print_dump_info (REPORT_DETAILS))
5159 fprintf (vect_dump, "no optab for perm_odd.");
5160 return false;
5161 }
5162
5163 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
5164 {
5165 if (vect_print_dump_info (REPORT_DETAILS))
5166 fprintf (vect_dump, "perm_odd op not supported by target.");
5167 return false;
5168 }
5169 return true;
5170 }
5171
5172
5173 /* Function vect_permute_load_chain.
5174
5175 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5176 a power of 2, generate extract_even/odd stmts to reorder the input data
5177 correctly. Return the final references for loads in RESULT_CHAIN.
5178
5179 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5180 The input is 4 vectors each containing 8 elements. We assign a number to each
5181 element, the input sequence is:
5182
5183 1st vec: 0 1 2 3 4 5 6 7
5184 2nd vec: 8 9 10 11 12 13 14 15
5185 3rd vec: 16 17 18 19 20 21 22 23
5186 4th vec: 24 25 26 27 28 29 30 31
5187
5188 The output sequence should be:
5189
5190 1st vec: 0 4 8 12 16 20 24 28
5191 2nd vec: 1 5 9 13 17 21 25 29
5192 3rd vec: 2 6 10 14 18 22 26 30
5193 4th vec: 3 7 11 15 19 23 27 31
5194
5195 i.e., the first output vector should contain the first elements of each
5196 interleaving group, etc.
5197
5198 We use extract_even/odd instructions to create such output. The input of each
5199 extract_even/odd operation is two vectors
5200 1st vec 2nd vec
5201 0 1 2 3 4 5 6 7
5202
5203 and the output is the vector of extracted even/odd elements. The output of
5204 extract_even will be: 0 2 4 6
5205 and of extract_odd: 1 3 5 7
5206
5207
5208 The permutation is done in log LENGTH stages. In each stage extract_even and
5209 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5210 order. In our example,
5211
5212 E1: extract_even (1st vec, 2nd vec)
5213 E2: extract_odd (1st vec, 2nd vec)
5214 E3: extract_even (3rd vec, 4th vec)
5215 E4: extract_odd (3rd vec, 4th vec)
5216
5217 The output for the first stage will be:
5218
5219 E1: 0 2 4 6 8 10 12 14
5220 E2: 1 3 5 7 9 11 13 15
5221 E3: 16 18 20 22 24 26 28 30
5222 E4: 17 19 21 23 25 27 29 31
5223
5224 In order to proceed and create the correct sequence for the next stage (or
5225 for the correct output, if the second stage is the last one, as in our
5226 example), we first put the output of extract_even operation and then the
5227 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5228 The input for the second stage is:
5229
5230 1st vec (E1): 0 2 4 6 8 10 12 14
5231 2nd vec (E3): 16 18 20 22 24 26 28 30
5232 3rd vec (E2): 1 3 5 7 9 11 13 15
5233 4th vec (E4): 17 19 21 23 25 27 29 31
5234
5235 The output of the second stage:
5236
5237 E1: 0 4 8 12 16 20 24 28
5238 E2: 2 6 10 14 18 22 26 30
5239 E3: 1 5 9 13 17 21 25 29
5240 E4: 3 7 11 15 19 23 27 31
5241
5242 And RESULT_CHAIN after reordering:
5243
5244 1st vec (E1): 0 4 8 12 16 20 24 28
5245 2nd vec (E3): 1 5 9 13 17 21 25 29
5246 3rd vec (E2): 2 6 10 14 18 22 26 30
5247 4th vec (E4): 3 7 11 15 19 23 27 31. */
5248
5249 static bool
5250 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5251 unsigned int length,
5252 tree stmt,
5253 block_stmt_iterator *bsi,
5254 VEC(tree,heap) **result_chain)
5255 {
5256 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
5257 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5258 tree tmp;
5259 int i;
5260 unsigned int j;
5261
5262 /* Check that the operation is supported. */
5263 if (!vect_strided_load_supported (vectype))
5264 return false;
5265
5266 *result_chain = VEC_copy (tree, heap, dr_chain);
5267 for (i = 0; i < exact_log2 (length); i++)
5268 {
5269 for (j = 0; j < length; j +=2)
5270 {
5271 first_vect = VEC_index (tree, dr_chain, j);
5272 second_vect = VEC_index (tree, dr_chain, j+1);
5273
5274 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5275 perm_dest = create_tmp_var (vectype, "vect_perm_even");
5276 DECL_GIMPLE_REG_P (perm_dest) = 1;
5277 add_referenced_var (perm_dest);
5278
5279 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
5280 first_vect, second_vect);
5281 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5282
5283 data_ref = make_ssa_name (perm_dest, perm_stmt);
5284 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5285 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5286 mark_symbols_for_renaming (perm_stmt);
5287
5288 VEC_replace (tree, *result_chain, j/2, data_ref);
5289
5290 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5291 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
5292 DECL_GIMPLE_REG_P (perm_dest) = 1;
5293 add_referenced_var (perm_dest);
5294
5295 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
5296 first_vect, second_vect);
5297 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5298 data_ref = make_ssa_name (perm_dest, perm_stmt);
5299 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5300 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5301 mark_symbols_for_renaming (perm_stmt);
5302
5303 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5304 }
5305 dr_chain = VEC_copy (tree, heap, *result_chain);
5306 }
5307 return true;
5308 }
5309
5310
5311 /* Function vect_transform_strided_load.
5312
5313 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5314 to perform their permutation and ascribe the result vectorized statements to
5315 the scalar statements.
5316 */
5317
5318 static bool
5319 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
5320 block_stmt_iterator *bsi)
5321 {
5322 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5323 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5324 tree next_stmt, new_stmt;
5325 VEC(tree,heap) *result_chain = NULL;
5326 unsigned int i, gap_count;
5327 tree tmp_data_ref;
5328
5329 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5330 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5331 vectors, that are ready for vector computation. */
5332 result_chain = VEC_alloc (tree, heap, size);
5333 /* Permute. */
5334 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
5335 return false;
5336
5337 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5338 Since we scan the chain starting from it's first node, their order
5339 corresponds the order of data-refs in RESULT_CHAIN. */
5340 next_stmt = first_stmt;
5341 gap_count = 1;
5342 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
5343 {
5344 if (!next_stmt)
5345 break;
5346
5347 /* Skip the gaps. Loads created for the gaps will be removed by dead
5348 code elimination pass later.
5349 DR_GROUP_GAP is the number of steps in elements from the previous
5350 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5351 correspond to the gaps.
5352 */
5353 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5354 {
5355 gap_count++;
5356 continue;
5357 }
5358
5359 while (next_stmt)
5360 {
5361 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5362 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5363 copies, and we put the new vector statement in the first available
5364 RELATED_STMT. */
5365 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5366 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5367 else
5368 {
5369 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5370 tree rel_stmt = STMT_VINFO_RELATED_STMT (
5371 vinfo_for_stmt (prev_stmt));
5372 while (rel_stmt)
5373 {
5374 prev_stmt = rel_stmt;
5375 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5376 }
5377 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5378 }
5379 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5380 gap_count = 1;
5381 /* If NEXT_STMT accesses the same DR as the previous statement,
5382 put the same TMP_DATA_REF as its vectorized statement; otherwise
5383 get the next data-ref from RESULT_CHAIN. */
5384 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5385 break;
5386 }
5387 }
5388 return true;
5389 }
5390
5391
5392 /* vectorizable_load.
5393
5394 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5395 can be vectorized.
5396 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5397 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5398 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5399
5400 bool
5401 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
5402 slp_tree slp_node)
5403 {
5404 tree scalar_dest;
5405 tree vec_dest = NULL;
5406 tree data_ref = NULL;
5407 tree op;
5408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5409 stmt_vec_info prev_stmt_info;
5410 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5411 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5412 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5413 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5414 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
5415 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5416 tree new_temp;
5417 int mode;
5418 tree new_stmt = NULL_TREE;
5419 tree dummy;
5420 enum dr_alignment_support alignment_support_scheme;
5421 tree dataref_ptr = NULL_TREE;
5422 tree ptr_incr;
5423 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5424 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5425 int i, j, group_size;
5426 tree msq = NULL_TREE, lsq;
5427 tree offset = NULL_TREE;
5428 tree realignment_token = NULL_TREE;
5429 tree phi = NULL_TREE;
5430 VEC(tree,heap) *dr_chain = NULL;
5431 bool strided_load = false;
5432 tree first_stmt;
5433 tree scalar_type;
5434 bool inv_p;
5435 bool compute_in_loop = false;
5436 struct loop *at_loop;
5437 int vec_num;
5438 bool slp = (slp_node != NULL);
5439
5440 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5441 this, so we can safely override NCOPIES with 1 here. */
5442 if (slp)
5443 ncopies = 1;
5444
5445 gcc_assert (ncopies >= 1);
5446
5447 /* FORNOW. This restriction should be relaxed. */
5448 if (nested_in_vect_loop && ncopies > 1)
5449 {
5450 if (vect_print_dump_info (REPORT_DETAILS))
5451 fprintf (vect_dump, "multiple types in nested loop.");
5452 return false;
5453 }
5454
5455 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5456 return false;
5457
5458 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5459 return false;
5460
5461 /* FORNOW: not yet supported. */
5462 if (STMT_VINFO_LIVE_P (stmt_info))
5463 {
5464 if (vect_print_dump_info (REPORT_DETAILS))
5465 fprintf (vect_dump, "value used after loop.");
5466 return false;
5467 }
5468
5469 /* Is vectorizable load? */
5470 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5471 return false;
5472
5473 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5474 if (TREE_CODE (scalar_dest) != SSA_NAME)
5475 return false;
5476
5477 op = GIMPLE_STMT_OPERAND (stmt, 1);
5478 if (TREE_CODE (op) != ARRAY_REF
5479 && TREE_CODE (op) != INDIRECT_REF
5480 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
5481 return false;
5482
5483 if (!STMT_VINFO_DATA_REF (stmt_info))
5484 return false;
5485
5486 scalar_type = TREE_TYPE (DR_REF (dr));
5487 mode = (int) TYPE_MODE (vectype);
5488
5489 /* FORNOW. In some cases can vectorize even if data-type not supported
5490 (e.g. - data copies). */
5491 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
5492 {
5493 if (vect_print_dump_info (REPORT_DETAILS))
5494 fprintf (vect_dump, "Aligned load, but unsupported type.");
5495 return false;
5496 }
5497
5498 /* Check if the load is a part of an interleaving chain. */
5499 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5500 {
5501 strided_load = true;
5502 /* FORNOW */
5503 gcc_assert (! nested_in_vect_loop);
5504
5505 /* Check if interleaving is supported. */
5506 if (!vect_strided_load_supported (vectype)
5507 && !PURE_SLP_STMT (stmt_info) && !slp)
5508 return false;
5509 }
5510
5511 if (!vec_stmt) /* transformation not required. */
5512 {
5513 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5514 vect_model_load_cost (stmt_info, ncopies, NULL);
5515 return true;
5516 }
5517
5518 if (vect_print_dump_info (REPORT_DETAILS))
5519 fprintf (vect_dump, "transform load.");
5520
5521 /** Transform. **/
5522
5523 if (strided_load)
5524 {
5525 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5526 /* Check if the chain of loads is already vectorized. */
5527 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
5528 {
5529 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5530 return true;
5531 }
5532 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5533 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5534 dr_chain = VEC_alloc (tree, heap, group_size);
5535
5536 /* VEC_NUM is the number of vect stmts to be created for this group. */
5537 if (slp)
5538 {
5539 strided_load = false;
5540 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5541 }
5542 else
5543 vec_num = group_size;
5544 }
5545 else
5546 {
5547 first_stmt = stmt;
5548 first_dr = dr;
5549 group_size = vec_num = 1;
5550 }
5551
5552 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5553 gcc_assert (alignment_support_scheme);
5554
5555 /* In case the vectorization factor (VF) is bigger than the number
5556 of elements that we can fit in a vectype (nunits), we have to generate
5557 more than one vector stmt - i.e - we need to "unroll" the
5558 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5559 from one copy of the vector stmt to the next, in the field
5560 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5561 stages to find the correct vector defs to be used when vectorizing
5562 stmts that use the defs of the current stmt. The example below illustrates
5563 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5564 4 vectorized stmts):
5565
5566 before vectorization:
5567 RELATED_STMT VEC_STMT
5568 S1: x = memref - -
5569 S2: z = x + 1 - -
5570
5571 step 1: vectorize stmt S1:
5572 We first create the vector stmt VS1_0, and, as usual, record a
5573 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5574 Next, we create the vector stmt VS1_1, and record a pointer to
5575 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5576 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5577 stmts and pointers:
5578 RELATED_STMT VEC_STMT
5579 VS1_0: vx0 = memref0 VS1_1 -
5580 VS1_1: vx1 = memref1 VS1_2 -
5581 VS1_2: vx2 = memref2 VS1_3 -
5582 VS1_3: vx3 = memref3 - -
5583 S1: x = load - VS1_0
5584 S2: z = x + 1 - -
5585
5586 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5587 information we recorded in RELATED_STMT field is used to vectorize
5588 stmt S2. */
5589
5590 /* In case of interleaving (non-unit strided access):
5591
5592 S1: x2 = &base + 2
5593 S2: x0 = &base
5594 S3: x1 = &base + 1
5595 S4: x3 = &base + 3
5596
5597 Vectorized loads are created in the order of memory accesses
5598 starting from the access of the first stmt of the chain:
5599
5600 VS1: vx0 = &base
5601 VS2: vx1 = &base + vec_size*1
5602 VS3: vx3 = &base + vec_size*2
5603 VS4: vx4 = &base + vec_size*3
5604
5605 Then permutation statements are generated:
5606
5607 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5608 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5609 ...
5610
5611 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5612 (the order of the data-refs in the output of vect_permute_load_chain
5613 corresponds to the order of scalar stmts in the interleaving chain - see
5614 the documentation of vect_permute_load_chain()).
5615 The generation of permutation stmts and recording them in
5616 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5617
5618 In case of both multiple types and interleaving, the vector loads and
5619 permutation stmts above are created for every copy. The result vector stmts
5620 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5621 STMT_VINFO_RELATED_STMT for the next copies. */
5622
5623 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5624 on a target that supports unaligned accesses (dr_unaligned_supported)
5625 we generate the following code:
5626 p = initial_addr;
5627 indx = 0;
5628 loop {
5629 p = p + indx * vectype_size;
5630 vec_dest = *(p);
5631 indx = indx + 1;
5632 }
5633
5634 Otherwise, the data reference is potentially unaligned on a target that
5635 does not support unaligned accesses (dr_explicit_realign_optimized) -
5636 then generate the following code, in which the data in each iteration is
5637 obtained by two vector loads, one from the previous iteration, and one
5638 from the current iteration:
5639 p1 = initial_addr;
5640 msq_init = *(floor(p1))
5641 p2 = initial_addr + VS - 1;
5642 realignment_token = call target_builtin;
5643 indx = 0;
5644 loop {
5645 p2 = p2 + indx * vectype_size
5646 lsq = *(floor(p2))
5647 vec_dest = realign_load (msq, lsq, realignment_token)
5648 indx = indx + 1;
5649 msq = lsq;
5650 } */
5651
5652 /* If the misalignment remains the same throughout the execution of the
5653 loop, we can create the init_addr and permutation mask at the loop
5654 preheader. Otherwise, it needs to be created inside the loop.
5655 This can only occur when vectorizing memory accesses in the inner-loop
5656 nested within an outer-loop that is being vectorized. */
5657
5658 if (nested_in_vect_loop_p (loop, stmt)
5659 && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
5660 {
5661 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5662 compute_in_loop = true;
5663 }
5664
5665 if ((alignment_support_scheme == dr_explicit_realign_optimized
5666 || alignment_support_scheme == dr_explicit_realign)
5667 && !compute_in_loop)
5668 {
5669 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token,
5670 alignment_support_scheme, NULL_TREE,
5671 &at_loop);
5672 if (alignment_support_scheme == dr_explicit_realign_optimized)
5673 {
5674 phi = SSA_NAME_DEF_STMT (msq);
5675 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5676 }
5677 }
5678 else
5679 at_loop = loop;
5680
5681 prev_stmt_info = NULL;
5682 for (j = 0; j < ncopies; j++)
5683 {
5684 /* 1. Create the vector pointer update chain. */
5685 if (j == 0)
5686 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
5687 at_loop, offset,
5688 &dummy, &ptr_incr, false,
5689 NULL_TREE, &inv_p);
5690 else
5691 dataref_ptr =
5692 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
5693
5694 for (i = 0; i < vec_num; i++)
5695 {
5696 if (i > 0)
5697 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
5698 NULL_TREE);
5699
5700 /* 2. Create the vector-load in the loop. */
5701 switch (alignment_support_scheme)
5702 {
5703 case dr_aligned:
5704 gcc_assert (aligned_access_p (first_dr));
5705 data_ref = build_fold_indirect_ref (dataref_ptr);
5706 break;
5707 case dr_unaligned_supported:
5708 {
5709 int mis = DR_MISALIGNMENT (first_dr);
5710 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
5711
5712 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
5713 data_ref =
5714 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
5715 break;
5716 }
5717 case dr_explicit_realign:
5718 {
5719 tree ptr, bump;
5720 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5721
5722 if (compute_in_loop)
5723 msq = vect_setup_realignment (first_stmt, bsi,
5724 &realignment_token,
5725 dr_explicit_realign,
5726 dataref_ptr, NULL);
5727
5728 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5729 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5730 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5731 new_temp = make_ssa_name (vec_dest, new_stmt);
5732 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5733 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5734 copy_virtual_operands (new_stmt, stmt);
5735 mark_symbols_for_renaming (new_stmt);
5736 msq = new_temp;
5737
5738 bump = size_binop (MULT_EXPR, vs_minus_1,
5739 TYPE_SIZE_UNIT (scalar_type));
5740 ptr = bump_vector_ptr (dataref_ptr, NULL_TREE, bsi, stmt, bump);
5741 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5742 break;
5743 }
5744 case dr_explicit_realign_optimized:
5745 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5746 break;
5747 default:
5748 gcc_unreachable ();
5749 }
5750 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5751 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5752 new_temp = make_ssa_name (vec_dest, new_stmt);
5753 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5754 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5755 mark_symbols_for_renaming (new_stmt);
5756
5757 /* 3. Handle explicit realignment if necessary/supported. Create in
5758 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5759 if (alignment_support_scheme == dr_explicit_realign_optimized
5760 || alignment_support_scheme == dr_explicit_realign)
5761 {
5762 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
5763 if (!realignment_token)
5764 realignment_token = dataref_ptr;
5765 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5766 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
5767 realignment_token);
5768 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5769 new_temp = make_ssa_name (vec_dest, new_stmt);
5770 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5771 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5772
5773 if (alignment_support_scheme == dr_explicit_realign_optimized)
5774 {
5775 if (i == vec_num - 1 && j == ncopies - 1)
5776 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
5777 msq = lsq;
5778 }
5779 }
5780
5781 /* 4. Handle invariant-load. */
5782 if (inv_p)
5783 {
5784 gcc_assert (!strided_load);
5785 gcc_assert (nested_in_vect_loop_p (loop, stmt));
5786 if (j == 0)
5787 {
5788 int k;
5789 tree t = NULL_TREE;
5790 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
5791
5792 /* CHECKME: bitpos depends on endianess? */
5793 bitpos = bitsize_zero_node;
5794 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5795 bitsize, bitpos);
5796 BIT_FIELD_REF_UNSIGNED (vec_inv) =
5797 TYPE_UNSIGNED (scalar_type);
5798 vec_dest =
5799 vect_create_destination_var (scalar_dest, NULL_TREE);
5800 new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
5801 new_temp = make_ssa_name (vec_dest, new_stmt);
5802 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5803 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5804
5805 for (k = nunits - 1; k >= 0; --k)
5806 t = tree_cons (NULL_TREE, new_temp, t);
5807 /* FIXME: use build_constructor directly. */
5808 vec_inv = build_constructor_from_list (vectype, t);
5809 new_temp = vect_init_vector (stmt, vec_inv, vectype, bsi);
5810 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5811 }
5812 else
5813 gcc_unreachable (); /* FORNOW. */
5814 }
5815
5816 /* Collect vector loads and later create their permutation in
5817 vect_transform_strided_load (). */
5818 if (strided_load)
5819 VEC_quick_push (tree, dr_chain, new_temp);
5820
5821 /* Store vector loads in the corresponding SLP_NODE. */
5822 if (slp)
5823 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5824 }
5825
5826 /* FORNOW: SLP with multiple types is unsupported. */
5827 if (slp)
5828 return true;
5829
5830 if (strided_load)
5831 {
5832 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
5833 return false;
5834 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5835 dr_chain = VEC_alloc (tree, heap, group_size);
5836 }
5837 else
5838 {
5839 if (j == 0)
5840 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5841 else
5842 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5843 prev_stmt_info = vinfo_for_stmt (new_stmt);
5844 }
5845 }
5846
5847 return true;
5848 }
5849
5850
5851 /* Function vectorizable_live_operation.
5852
5853 STMT computes a value that is used outside the loop. Check if
5854 it can be supported. */
5855
5856 bool
5857 vectorizable_live_operation (tree stmt,
5858 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
5859 tree *vec_stmt ATTRIBUTE_UNUSED)
5860 {
5861 tree operation;
5862 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5863 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5864 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5865 int i;
5866 int op_type;
5867 tree op;
5868 tree def, def_stmt;
5869 enum vect_def_type dt;
5870
5871 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5872
5873 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5874 return false;
5875
5876 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5877 return false;
5878
5879 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
5880 return false;
5881
5882 /* FORNOW. CHECKME. */
5883 if (nested_in_vect_loop_p (loop, stmt))
5884 return false;
5885
5886 operation = GIMPLE_STMT_OPERAND (stmt, 1);
5887 op_type = TREE_OPERAND_LENGTH (operation);
5888
5889 /* FORNOW: support only if all uses are invariant. This means
5890 that the scalar operations can remain in place, unvectorized.
5891 The original last scalar value that they compute will be used. */
5892
5893 for (i = 0; i < op_type; i++)
5894 {
5895 op = TREE_OPERAND (operation, i);
5896 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5897 {
5898 if (vect_print_dump_info (REPORT_DETAILS))
5899 fprintf (vect_dump, "use not simple.");
5900 return false;
5901 }
5902
5903 if (dt != vect_invariant_def && dt != vect_constant_def)
5904 return false;
5905 }
5906
5907 /* No transformation is required for the cases we currently support. */
5908 return true;
5909 }
5910
5911
5912 /* Function vect_is_simple_cond.
5913
5914 Input:
5915 LOOP - the loop that is being vectorized.
5916 COND - Condition that is checked for simple use.
5917
5918 Returns whether a COND can be vectorized. Checks whether
5919 condition operands are supportable using vec_is_simple_use. */
5920
5921 static bool
5922 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
5923 {
5924 tree lhs, rhs;
5925 tree def;
5926 enum vect_def_type dt;
5927
5928 if (!COMPARISON_CLASS_P (cond))
5929 return false;
5930
5931 lhs = TREE_OPERAND (cond, 0);
5932 rhs = TREE_OPERAND (cond, 1);
5933
5934 if (TREE_CODE (lhs) == SSA_NAME)
5935 {
5936 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5937 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
5938 return false;
5939 }
5940 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5941 && TREE_CODE (lhs) != FIXED_CST)
5942 return false;
5943
5944 if (TREE_CODE (rhs) == SSA_NAME)
5945 {
5946 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5947 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
5948 return false;
5949 }
5950 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5951 && TREE_CODE (rhs) != FIXED_CST)
5952 return false;
5953
5954 return true;
5955 }
5956
5957 /* vectorizable_condition.
5958
5959 Check if STMT is conditional modify expression that can be vectorized.
5960 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5961 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5962 at BSI.
5963
5964 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5965
5966 bool
5967 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
5968 {
5969 tree scalar_dest = NULL_TREE;
5970 tree vec_dest = NULL_TREE;
5971 tree op = NULL_TREE;
5972 tree cond_expr, then_clause, else_clause;
5973 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5974 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5975 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
5976 tree vec_compare, vec_cond_expr;
5977 tree new_temp;
5978 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5979 enum machine_mode vec_mode;
5980 tree def;
5981 enum vect_def_type dt;
5982 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5983 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5984
5985 gcc_assert (ncopies >= 1);
5986 if (ncopies > 1)
5987 return false; /* FORNOW */
5988
5989 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5990 return false;
5991
5992 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5993 return false;
5994
5995 /* FORNOW: SLP not supported. */
5996 if (STMT_SLP_TYPE (stmt_info))
5997 return false;
5998
5999 /* FORNOW: not yet supported. */
6000 if (STMT_VINFO_LIVE_P (stmt_info))
6001 {
6002 if (vect_print_dump_info (REPORT_DETAILS))
6003 fprintf (vect_dump, "value used after loop.");
6004 return false;
6005 }
6006
6007 /* Is vectorizable conditional operation? */
6008 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
6009 return false;
6010
6011 op = GIMPLE_STMT_OPERAND (stmt, 1);
6012
6013 if (TREE_CODE (op) != COND_EXPR)
6014 return false;
6015
6016 cond_expr = TREE_OPERAND (op, 0);
6017 then_clause = TREE_OPERAND (op, 1);
6018 else_clause = TREE_OPERAND (op, 2);
6019
6020 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
6021 return false;
6022
6023 /* We do not handle two different vector types for the condition
6024 and the values. */
6025 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
6026 return false;
6027
6028 if (TREE_CODE (then_clause) == SSA_NAME)
6029 {
6030 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6031 if (!vect_is_simple_use (then_clause, loop_vinfo,
6032 &then_def_stmt, &def, &dt))
6033 return false;
6034 }
6035 else if (TREE_CODE (then_clause) != INTEGER_CST
6036 && TREE_CODE (then_clause) != REAL_CST
6037 && TREE_CODE (then_clause) != FIXED_CST)
6038 return false;
6039
6040 if (TREE_CODE (else_clause) == SSA_NAME)
6041 {
6042 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6043 if (!vect_is_simple_use (else_clause, loop_vinfo,
6044 &else_def_stmt, &def, &dt))
6045 return false;
6046 }
6047 else if (TREE_CODE (else_clause) != INTEGER_CST
6048 && TREE_CODE (else_clause) != REAL_CST
6049 && TREE_CODE (else_clause) != FIXED_CST)
6050 return false;
6051
6052
6053 vec_mode = TYPE_MODE (vectype);
6054
6055 if (!vec_stmt)
6056 {
6057 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6058 return expand_vec_cond_expr_p (op, vec_mode);
6059 }
6060
6061 /* Transform */
6062
6063 /* Handle def. */
6064 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
6065 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6066
6067 /* Handle cond expr. */
6068 vec_cond_lhs =
6069 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
6070 vec_cond_rhs =
6071 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6072 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6073 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
6074
6075 /* Arguments are ready. create the new vector stmt. */
6076 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6077 vec_cond_lhs, vec_cond_rhs);
6078 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6079 vec_compare, vec_then_clause, vec_else_clause);
6080
6081 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
6082 new_temp = make_ssa_name (vec_dest, *vec_stmt);
6083 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
6084 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
6085
6086 return true;
6087 }
6088
6089
6090 /* Function vect_transform_stmt.
6091
6092 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6093
6094 static bool
6095 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store,
6096 slp_tree slp_node)
6097 {
6098 bool is_store = false;
6099 tree vec_stmt = NULL_TREE;
6100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6101 tree orig_stmt_in_pattern;
6102 bool done;
6103
6104 switch (STMT_VINFO_TYPE (stmt_info))
6105 {
6106 case type_demotion_vec_info_type:
6107 gcc_assert (!slp_node);
6108 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
6109 gcc_assert (done);
6110 break;
6111
6112 case type_promotion_vec_info_type:
6113 gcc_assert (!slp_node);
6114 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
6115 gcc_assert (done);
6116 break;
6117
6118 case type_conversion_vec_info_type:
6119 done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node);
6120 gcc_assert (done);
6121 break;
6122
6123 case induc_vec_info_type:
6124 gcc_assert (!slp_node);
6125 done = vectorizable_induction (stmt, bsi, &vec_stmt);
6126 gcc_assert (done);
6127 break;
6128
6129 case op_vec_info_type:
6130 done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node);
6131 gcc_assert (done);
6132 break;
6133
6134 case assignment_vec_info_type:
6135 done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node);
6136 gcc_assert (done);
6137 break;
6138
6139 case load_vec_info_type:
6140 done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node);
6141 gcc_assert (done);
6142 break;
6143
6144 case store_vec_info_type:
6145 done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node);
6146 gcc_assert (done);
6147 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6148 {
6149 /* In case of interleaving, the whole chain is vectorized when the
6150 last store in the chain is reached. Store stmts before the last
6151 one are skipped, and there vec_stmt_info shouldn't be freed
6152 meanwhile. */
6153 *strided_store = true;
6154 if (STMT_VINFO_VEC_STMT (stmt_info))
6155 is_store = true;
6156 }
6157 else
6158 is_store = true;
6159 break;
6160
6161 case condition_vec_info_type:
6162 gcc_assert (!slp_node);
6163 done = vectorizable_condition (stmt, bsi, &vec_stmt);
6164 gcc_assert (done);
6165 break;
6166
6167 case call_vec_info_type:
6168 gcc_assert (!slp_node);
6169 done = vectorizable_call (stmt, bsi, &vec_stmt);
6170 break;
6171
6172 case reduc_vec_info_type:
6173 gcc_assert (!slp_node);
6174 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
6175 gcc_assert (done);
6176 break;
6177
6178 default:
6179 if (!STMT_VINFO_LIVE_P (stmt_info))
6180 {
6181 if (vect_print_dump_info (REPORT_DETAILS))
6182 fprintf (vect_dump, "stmt not supported.");
6183 gcc_unreachable ();
6184 }
6185 }
6186
6187 if (STMT_VINFO_LIVE_P (stmt_info)
6188 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6189 {
6190 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
6191 gcc_assert (done);
6192 }
6193
6194 if (vec_stmt)
6195 {
6196 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6197 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6198 if (orig_stmt_in_pattern)
6199 {
6200 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6201 /* STMT was inserted by the vectorizer to replace a computation idiom.
6202 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6203 computed this idiom. We need to record a pointer to VEC_STMT in
6204 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6205 documentation of vect_pattern_recog. */
6206 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6207 {
6208 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6209 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6210 }
6211 }
6212 }
6213
6214 return is_store;
6215 }
6216
6217
6218 /* This function builds ni_name = number of iterations loop executes
6219 on the loop preheader. */
6220
6221 static tree
6222 vect_build_loop_niters (loop_vec_info loop_vinfo)
6223 {
6224 tree ni_name, stmt, var;
6225 edge pe;
6226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6227 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6228
6229 var = create_tmp_var (TREE_TYPE (ni), "niters");
6230 add_referenced_var (var);
6231 ni_name = force_gimple_operand (ni, &stmt, false, var);
6232
6233 pe = loop_preheader_edge (loop);
6234 if (stmt)
6235 {
6236 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6237 gcc_assert (!new_bb);
6238 }
6239
6240 return ni_name;
6241 }
6242
6243
6244 /* This function generates the following statements:
6245
6246 ni_name = number of iterations loop executes
6247 ratio = ni_name / vf
6248 ratio_mult_vf_name = ratio * vf
6249
6250 and places them at the loop preheader edge. */
6251
6252 static void
6253 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6254 tree *ni_name_ptr,
6255 tree *ratio_mult_vf_name_ptr,
6256 tree *ratio_name_ptr)
6257 {
6258
6259 edge pe;
6260 basic_block new_bb;
6261 tree stmt, ni_name;
6262 tree var;
6263 tree ratio_name;
6264 tree ratio_mult_vf_name;
6265 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6266 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6267 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6268 tree log_vf;
6269
6270 pe = loop_preheader_edge (loop);
6271
6272 /* Generate temporary variable that contains
6273 number of iterations loop executes. */
6274
6275 ni_name = vect_build_loop_niters (loop_vinfo);
6276 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
6277
6278 /* Create: ratio = ni >> log2(vf) */
6279
6280 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6281 if (!is_gimple_val (ratio_name))
6282 {
6283 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6284 add_referenced_var (var);
6285
6286 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
6287 pe = loop_preheader_edge (loop);
6288 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6289 gcc_assert (!new_bb);
6290 }
6291
6292 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6293
6294 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6295 ratio_name, log_vf);
6296 if (!is_gimple_val (ratio_mult_vf_name))
6297 {
6298 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6299 add_referenced_var (var);
6300
6301 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
6302 true, var);
6303 pe = loop_preheader_edge (loop);
6304 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6305 gcc_assert (!new_bb);
6306 }
6307
6308 *ni_name_ptr = ni_name;
6309 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6310 *ratio_name_ptr = ratio_name;
6311
6312 return;
6313 }
6314
6315
6316 /* Function vect_update_ivs_after_vectorizer.
6317
6318 "Advance" the induction variables of LOOP to the value they should take
6319 after the execution of LOOP. This is currently necessary because the
6320 vectorizer does not handle induction variables that are used after the
6321 loop. Such a situation occurs when the last iterations of LOOP are
6322 peeled, because:
6323 1. We introduced new uses after LOOP for IVs that were not originally used
6324 after LOOP: the IVs of LOOP are now used by an epilog loop.
6325 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6326 times, whereas the loop IVs should be bumped N times.
6327
6328 Input:
6329 - LOOP - a loop that is going to be vectorized. The last few iterations
6330 of LOOP were peeled.
6331 - NITERS - the number of iterations that LOOP executes (before it is
6332 vectorized). i.e, the number of times the ivs should be bumped.
6333 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6334 coming out from LOOP on which there are uses of the LOOP ivs
6335 (this is the path from LOOP->exit to epilog_loop->preheader).
6336
6337 The new definitions of the ivs are placed in LOOP->exit.
6338 The phi args associated with the edge UPDATE_E in the bb
6339 UPDATE_E->dest are updated accordingly.
6340
6341 Assumption 1: Like the rest of the vectorizer, this function assumes
6342 a single loop exit that has a single predecessor.
6343
6344 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6345 organized in the same order.
6346
6347 Assumption 3: The access function of the ivs is simple enough (see
6348 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6349
6350 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6351 coming out of LOOP on which the ivs of LOOP are used (this is the path
6352 that leads to the epilog loop; other paths skip the epilog loop). This
6353 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6354 needs to have its phis updated.
6355 */
6356
6357 static void
6358 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6359 edge update_e)
6360 {
6361 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6362 basic_block exit_bb = single_exit (loop)->dest;
6363 tree phi, phi1;
6364 basic_block update_bb = update_e->dest;
6365
6366 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6367
6368 /* Make sure there exists a single-predecessor exit bb: */
6369 gcc_assert (single_pred_p (exit_bb));
6370
6371 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
6372 phi && phi1;
6373 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
6374 {
6375 tree access_fn = NULL;
6376 tree evolution_part;
6377 tree init_expr;
6378 tree step_expr;
6379 tree var, ni, ni_name;
6380 block_stmt_iterator last_bsi;
6381
6382 if (vect_print_dump_info (REPORT_DETAILS))
6383 {
6384 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6385 print_generic_expr (vect_dump, phi, TDF_SLIM);
6386 }
6387
6388 /* Skip virtual phi's. */
6389 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6390 {
6391 if (vect_print_dump_info (REPORT_DETAILS))
6392 fprintf (vect_dump, "virtual phi. skip.");
6393 continue;
6394 }
6395
6396 /* Skip reduction phis. */
6397 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6398 {
6399 if (vect_print_dump_info (REPORT_DETAILS))
6400 fprintf (vect_dump, "reduc phi. skip.");
6401 continue;
6402 }
6403
6404 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6405 gcc_assert (access_fn);
6406 evolution_part =
6407 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6408 gcc_assert (evolution_part != NULL_TREE);
6409
6410 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6411 of degree >= 2 or exponential. */
6412 gcc_assert (!tree_is_chrec (evolution_part));
6413
6414 step_expr = evolution_part;
6415 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
6416 loop->num));
6417
6418 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
6419 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
6420 init_expr,
6421 fold_convert (sizetype,
6422 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
6423 niters, step_expr)));
6424 else
6425 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
6426 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
6427 fold_convert (TREE_TYPE (init_expr),
6428 niters),
6429 step_expr),
6430 init_expr);
6431
6432
6433
6434 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
6435 add_referenced_var (var);
6436
6437 last_bsi = bsi_last (exit_bb);
6438 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
6439 true, BSI_SAME_STMT);
6440
6441 /* Fix phi expressions in the successor bb. */
6442 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
6443 }
6444 }
6445
6446
6447 /* Function vect_do_peeling_for_loop_bound
6448
6449 Peel the last iterations of the loop represented by LOOP_VINFO.
6450 The peeled iterations form a new epilog loop. Given that the loop now
6451 iterates NITERS times, the new epilog loop iterates
6452 NITERS % VECTORIZATION_FACTOR times.
6453
6454 The original loop will later be made to iterate
6455 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6456
6457 static void
6458 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
6459 {
6460 tree ni_name, ratio_mult_vf_name;
6461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6462 struct loop *new_loop;
6463 edge update_e;
6464 basic_block preheader;
6465 int loop_num;
6466 unsigned int th;
6467 int min_scalar_loop_bound;
6468 int min_profitable_iters;
6469
6470 if (vect_print_dump_info (REPORT_DETAILS))
6471 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
6472
6473 initialize_original_copy_tables ();
6474
6475 /* Generate the following variables on the preheader of original loop:
6476
6477 ni_name = number of iteration the original loop executes
6478 ratio = ni_name / vf
6479 ratio_mult_vf_name = ratio * vf */
6480 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
6481 &ratio_mult_vf_name, ratio);
6482
6483 loop_num = loop->num;
6484
6485 /* Analyze cost to set threshhold for vectorized loop. */
6486 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
6487 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
6488 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
6489
6490 /* Use the cost model only if it is more conservative than user specified
6491 threshold. */
6492
6493 th = (unsigned) min_scalar_loop_bound;
6494 if (min_profitable_iters
6495 && (!min_scalar_loop_bound
6496 || min_profitable_iters > min_scalar_loop_bound))
6497 th = (unsigned) min_profitable_iters;
6498
6499 if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
6500 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6501 && vect_print_dump_info (REPORT_DETAILS))
6502 fprintf (vect_dump, "vectorization may not be profitable.");
6503
6504 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
6505 ratio_mult_vf_name, ni_name, false,
6506 th);
6507 gcc_assert (new_loop);
6508 gcc_assert (loop_num == loop->num);
6509 #ifdef ENABLE_CHECKING
6510 slpeel_verify_cfg_after_peeling (loop, new_loop);
6511 #endif
6512
6513 /* A guard that controls whether the new_loop is to be executed or skipped
6514 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6515 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6516 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6517 is on the path where the LOOP IVs are used and need to be updated. */
6518
6519 preheader = loop_preheader_edge (new_loop)->src;
6520 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
6521 update_e = EDGE_PRED (preheader, 0);
6522 else
6523 update_e = EDGE_PRED (preheader, 1);
6524
6525 /* Update IVs of original loop as if they were advanced
6526 by ratio_mult_vf_name steps. */
6527 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
6528
6529 /* After peeling we have to reset scalar evolution analyzer. */
6530 scev_reset ();
6531
6532 free_original_copy_tables ();
6533 }
6534
6535
6536 /* Function vect_gen_niters_for_prolog_loop
6537
6538 Set the number of iterations for the loop represented by LOOP_VINFO
6539 to the minimum between LOOP_NITERS (the original iteration count of the loop)
6540 and the misalignment of DR - the data reference recorded in
6541 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6542 this loop, the data reference DR will refer to an aligned location.
6543
6544 The following computation is generated:
6545
6546 If the misalignment of DR is known at compile time:
6547 addr_mis = int mis = DR_MISALIGNMENT (dr);
6548 Else, compute address misalignment in bytes:
6549 addr_mis = addr & (vectype_size - 1)
6550
6551 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6552
6553 (elem_size = element type size; an element is the scalar element
6554 whose type is the inner type of the vectype)
6555
6556 For interleaving,
6557
6558 prolog_niters = min ( LOOP_NITERS ,
6559 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6560 where group_size is the size of the interleaved group.
6561
6562 The above formulas assume that VF == number of elements in the vector. This
6563 may not hold when there are multiple-types in the loop.
6564 In this case, for some data-references in the loop the VF does not represent
6565 the number of elements that fit in the vector. Therefore, instead of VF we
6566 use TYPE_VECTOR_SUBPARTS. */
6567
6568 static tree
6569 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
6570 {
6571 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
6572 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6573 tree var, stmt;
6574 tree iters, iters_name;
6575 edge pe;
6576 basic_block new_bb;
6577 tree dr_stmt = DR_STMT (dr);
6578 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
6579 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6580 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
6581 tree niters_type = TREE_TYPE (loop_niters);
6582 int group_size = 1;
6583 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
6584 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
6585
6586 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6587 {
6588 /* For interleaved access element size must be multiplied by the size of
6589 the interleaved group. */
6590 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
6591 DR_GROUP_FIRST_DR (stmt_info)));
6592 element_size *= group_size;
6593 }
6594
6595 pe = loop_preheader_edge (loop);
6596
6597 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
6598 {
6599 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
6600 int elem_misalign = byte_misalign / element_size;
6601
6602 if (vect_print_dump_info (REPORT_DETAILS))
6603 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
6604 iters = build_int_cst (niters_type,
6605 (nelements - elem_misalign)&(nelements/group_size-1));
6606 }
6607 else
6608 {
6609 tree new_stmts = NULL_TREE;
6610 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
6611 &new_stmts, NULL_TREE, loop);
6612 tree ptr_type = TREE_TYPE (start_addr);
6613 tree size = TYPE_SIZE (ptr_type);
6614 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
6615 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
6616 tree elem_size_log =
6617 build_int_cst (type, exact_log2 (vectype_align/nelements));
6618 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
6619 tree nelements_tree = build_int_cst (type, nelements);
6620 tree byte_misalign;
6621 tree elem_misalign;
6622
6623 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
6624 gcc_assert (!new_bb);
6625
6626 /* Create: byte_misalign = addr & (vectype_size - 1) */
6627 byte_misalign =
6628 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
6629
6630 /* Create: elem_misalign = byte_misalign / element_size */
6631 elem_misalign =
6632 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
6633
6634 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6635 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
6636 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
6637 iters = fold_convert (niters_type, iters);
6638 }
6639
6640 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6641 /* If the loop bound is known at compile time we already verified that it is
6642 greater than vf; since the misalignment ('iters') is at most vf, there's
6643 no need to generate the MIN_EXPR in this case. */
6644 if (TREE_CODE (loop_niters) != INTEGER_CST)
6645 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
6646
6647 if (vect_print_dump_info (REPORT_DETAILS))
6648 {
6649 fprintf (vect_dump, "niters for prolog loop: ");
6650 print_generic_expr (vect_dump, iters, TDF_SLIM);
6651 }
6652
6653 var = create_tmp_var (niters_type, "prolog_loop_niters");
6654 add_referenced_var (var);
6655 iters_name = force_gimple_operand (iters, &stmt, false, var);
6656
6657 /* Insert stmt on loop preheader edge. */
6658 if (stmt)
6659 {
6660 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6661 gcc_assert (!new_bb);
6662 }
6663
6664 return iters_name;
6665 }
6666
6667
6668 /* Function vect_update_init_of_dr
6669
6670 NITERS iterations were peeled from LOOP. DR represents a data reference
6671 in LOOP. This function updates the information recorded in DR to
6672 account for the fact that the first NITERS iterations had already been
6673 executed. Specifically, it updates the OFFSET field of DR. */
6674
6675 static void
6676 vect_update_init_of_dr (struct data_reference *dr, tree niters)
6677 {
6678 tree offset = DR_OFFSET (dr);
6679
6680 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
6681 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
6682 DR_OFFSET (dr) = offset;
6683 }
6684
6685
6686 /* Function vect_update_inits_of_drs
6687
6688 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6689 This function updates the information recorded for the data references in
6690 the loop to account for the fact that the first NITERS iterations had
6691 already been executed. Specifically, it updates the initial_condition of
6692 the access_function of all the data_references in the loop. */
6693
6694 static void
6695 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
6696 {
6697 unsigned int i;
6698 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
6699 struct data_reference *dr;
6700
6701 if (vect_print_dump_info (REPORT_DETAILS))
6702 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
6703
6704 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
6705 vect_update_init_of_dr (dr, niters);
6706 }
6707
6708
6709 /* Function vect_do_peeling_for_alignment
6710
6711 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6712 'niters' is set to the misalignment of one of the data references in the
6713 loop, thereby forcing it to refer to an aligned location at the beginning
6714 of the execution of this loop. The data reference for which we are
6715 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6716
6717 static void
6718 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
6719 {
6720 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6721 tree niters_of_prolog_loop, ni_name;
6722 tree n_iters;
6723 struct loop *new_loop;
6724
6725 if (vect_print_dump_info (REPORT_DETAILS))
6726 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
6727
6728 initialize_original_copy_tables ();
6729
6730 ni_name = vect_build_loop_niters (loop_vinfo);
6731 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
6732
6733 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
6734 new_loop =
6735 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
6736 niters_of_prolog_loop, ni_name, true, 0);
6737 gcc_assert (new_loop);
6738 #ifdef ENABLE_CHECKING
6739 slpeel_verify_cfg_after_peeling (new_loop, loop);
6740 #endif
6741
6742 /* Update number of times loop executes. */
6743 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
6744 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
6745 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
6746
6747 /* Update the init conditions of the access functions of all data refs. */
6748 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
6749
6750 /* After peeling we have to reset scalar evolution analyzer. */
6751 scev_reset ();
6752
6753 free_original_copy_tables ();
6754 }
6755
6756
6757 /* Function vect_create_cond_for_align_checks.
6758
6759 Create a conditional expression that represents the alignment checks for
6760 all of data references (array element references) whose alignment must be
6761 checked at runtime.
6762
6763 Input:
6764 LOOP_VINFO - two fields of the loop information are used.
6765 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6766 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6767
6768 Output:
6769 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6770 expression.
6771 The returned value is the conditional expression to be used in the if
6772 statement that controls which version of the loop gets executed at runtime.
6773
6774 The algorithm makes two assumptions:
6775 1) The number of bytes "n" in a vector is a power of 2.
6776 2) An address "a" is aligned if a%n is zero and that this
6777 test can be done as a&(n-1) == 0. For example, for 16
6778 byte vectors the test is a&0xf == 0. */
6779
6780 static tree
6781 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
6782 tree *cond_expr_stmt_list)
6783 {
6784 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6785 VEC(tree,heap) *may_misalign_stmts
6786 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
6787 tree ref_stmt, tmp;
6788 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
6789 tree mask_cst;
6790 unsigned int i;
6791 tree psize;
6792 tree int_ptrsize_type;
6793 char tmp_name[20];
6794 tree or_tmp_name = NULL_TREE;
6795 tree and_tmp, and_tmp_name, and_stmt;
6796 tree ptrsize_zero;
6797
6798 /* Check that mask is one less than a power of 2, i.e., mask is
6799 all zeros followed by all ones. */
6800 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
6801
6802 /* CHECKME: what is the best integer or unsigned type to use to hold a
6803 cast from a pointer value? */
6804 psize = TYPE_SIZE (ptr_type_node);
6805 int_ptrsize_type
6806 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
6807
6808 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6809 of the first vector of the i'th data reference. */
6810
6811 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
6812 {
6813 tree new_stmt_list = NULL_TREE;
6814 tree addr_base;
6815 tree addr_tmp, addr_tmp_name, addr_stmt;
6816 tree or_tmp, new_or_tmp_name, or_stmt;
6817
6818 /* create: addr_tmp = (int)(address_of_first_vector) */
6819 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
6820 &new_stmt_list, NULL_TREE, loop);
6821
6822 if (new_stmt_list != NULL_TREE)
6823 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
6824
6825 sprintf (tmp_name, "%s%d", "addr2int", i);
6826 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6827 add_referenced_var (addr_tmp);
6828 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
6829 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
6830 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
6831 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
6832 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
6833
6834 /* The addresses are OR together. */
6835
6836 if (or_tmp_name != NULL_TREE)
6837 {
6838 /* create: or_tmp = or_tmp | addr_tmp */
6839 sprintf (tmp_name, "%s%d", "orptrs", i);
6840 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6841 add_referenced_var (or_tmp);
6842 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
6843 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
6844 or_tmp_name, addr_tmp_name);
6845 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
6846 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
6847 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
6848 or_tmp_name = new_or_tmp_name;
6849 }
6850 else
6851 or_tmp_name = addr_tmp_name;
6852
6853 } /* end for i */
6854
6855 mask_cst = build_int_cst (int_ptrsize_type, mask);
6856
6857 /* create: and_tmp = or_tmp & mask */
6858 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
6859 add_referenced_var (and_tmp);
6860 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
6861
6862 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
6863 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
6864 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
6865 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
6866
6867 /* Make and_tmp the left operand of the conditional test against zero.
6868 if and_tmp has a nonzero bit then some address is unaligned. */
6869 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
6870 return build2 (EQ_EXPR, boolean_type_node,
6871 and_tmp_name, ptrsize_zero);
6872 }
6873
6874 /* Function vect_vfa_segment_size.
6875
6876 Create an expression that computes the size of segment
6877 that will be accessed for a data reference. The functions takes into
6878 account that realignment loads may access one more vector.
6879
6880 Input:
6881 DR: The data reference.
6882 VECT_FACTOR: vectorization factor.
6883
6884 Return an expression whose value is the size of segment which will be
6885 accessed by DR. */
6886
6887 static tree
6888 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
6889 {
6890 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
6891 DR_STEP (dr), vect_factor);
6892
6893 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
6894 {
6895 tree vector_size = TYPE_SIZE_UNIT
6896 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
6897
6898 segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
6899 segment_length, vector_size);
6900 }
6901 return fold_convert (sizetype, segment_length);
6902 }
6903
6904 /* Function vect_create_cond_for_alias_checks.
6905
6906 Create a conditional expression that represents the run-time checks for
6907 overlapping of address ranges represented by a list of data references
6908 relations passed as input.
6909
6910 Input:
6911 COND_EXPR - input conditional expression. New conditions will be chained
6912 with logical and operation.
6913 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
6914 to be checked.
6915
6916 Output:
6917 COND_EXPR - conditional expression.
6918 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6919 expression.
6920
6921
6922 The returned value is the conditional expression to be used in the if
6923 statement that controls which version of the loop gets executed at runtime.
6924 */
6925
6926 static void
6927 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
6928 tree * cond_expr,
6929 tree * cond_expr_stmt_list)
6930 {
6931 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6932 VEC (ddr_p, heap) * may_alias_ddrs =
6933 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
6934 tree vect_factor =
6935 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
6936
6937 ddr_p ddr;
6938 unsigned int i;
6939 tree part_cond_expr;
6940
6941 /* Create expression
6942 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
6943 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
6944 &&
6945 ...
6946 &&
6947 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
6948 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
6949
6950 if (VEC_empty (ddr_p, may_alias_ddrs))
6951 return;
6952
6953 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
6954 {
6955 struct data_reference *dr_a, *dr_b;
6956 tree dr_group_first_a, dr_group_first_b;
6957 tree addr_base_a, addr_base_b;
6958 tree segment_length_a, segment_length_b;
6959 tree stmt_a, stmt_b;
6960
6961 dr_a = DDR_A (ddr);
6962 stmt_a = DR_STMT (DDR_A (ddr));
6963 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
6964 if (dr_group_first_a)
6965 {
6966 stmt_a = dr_group_first_a;
6967 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
6968 }
6969
6970 dr_b = DDR_B (ddr);
6971 stmt_b = DR_STMT (DDR_B (ddr));
6972 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
6973 if (dr_group_first_b)
6974 {
6975 stmt_b = dr_group_first_b;
6976 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
6977 }
6978
6979 addr_base_a =
6980 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
6981 NULL_TREE, loop);
6982 addr_base_b =
6983 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
6984 NULL_TREE, loop);
6985
6986 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
6987 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
6988
6989 if (vect_print_dump_info (REPORT_DR_DETAILS))
6990 {
6991 fprintf (vect_dump,
6992 "create runtime check for data references ");
6993 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
6994 fprintf (vect_dump, " and ");
6995 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
6996 }
6997
6998
6999 part_cond_expr =
7000 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
7001 fold_build2 (LT_EXPR, boolean_type_node,
7002 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
7003 addr_base_a,
7004 segment_length_a),
7005 addr_base_b),
7006 fold_build2 (LT_EXPR, boolean_type_node,
7007 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
7008 addr_base_b,
7009 segment_length_b),
7010 addr_base_a));
7011
7012 if (*cond_expr)
7013 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
7014 *cond_expr, part_cond_expr);
7015 else
7016 *cond_expr = part_cond_expr;
7017 }
7018 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7019 fprintf (vect_dump, "created %u versioning for alias checks.\n",
7020 VEC_length (ddr_p, may_alias_ddrs));
7021
7022 }
7023
7024 /* Function vect_loop_versioning.
7025
7026 If the loop has data references that may or may not be aligned or/and
7027 has data reference relations whose independence was not proven then
7028 two versions of the loop need to be generated, one which is vectorized
7029 and one which isn't. A test is then generated to control which of the
7030 loops is executed. The test checks for the alignment of all of the
7031 data references that may or may not be aligned. An additional
7032 sequence of runtime tests is generated for each pairs of DDRs whose
7033 independence was not proven. The vectorized version of loop is
7034 executed only if both alias and alignment tests are passed. */
7035
7036 static void
7037 vect_loop_versioning (loop_vec_info loop_vinfo)
7038 {
7039 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7040 struct loop *nloop;
7041 tree cond_expr = NULL_TREE;
7042 tree cond_expr_stmt_list = NULL_TREE;
7043 basic_block condition_bb;
7044 block_stmt_iterator cond_exp_bsi;
7045 basic_block merge_bb;
7046 basic_block new_exit_bb;
7047 edge new_exit_e, e;
7048 tree orig_phi, new_phi, arg;
7049 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7050 tree gimplify_stmt_list;
7051
7052 if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7053 && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7054 return;
7055
7056 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
7057 cond_expr =
7058 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
7059
7060 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7061 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, &cond_expr_stmt_list);
7062
7063 cond_expr =
7064 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7065 cond_expr =
7066 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
7067 NULL_TREE);
7068 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
7069
7070 initialize_original_copy_tables ();
7071 nloop = loop_version (loop, cond_expr, &condition_bb,
7072 prob, prob, REG_BR_PROB_BASE - prob, true);
7073 free_original_copy_tables();
7074
7075 /* Loop versioning violates an assumption we try to maintain during
7076 vectorization - that the loop exit block has a single predecessor.
7077 After versioning, the exit block of both loop versions is the same
7078 basic block (i.e. it has two predecessors). Just in order to simplify
7079 following transformations in the vectorizer, we fix this situation
7080 here by adding a new (empty) block on the exit-edge of the loop,
7081 with the proper loop-exit phis to maintain loop-closed-form. */
7082
7083 merge_bb = single_exit (loop)->dest;
7084 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7085 new_exit_bb = split_edge (single_exit (loop));
7086 new_exit_e = single_exit (loop);
7087 e = EDGE_SUCC (new_exit_bb, 0);
7088
7089 for (orig_phi = phi_nodes (merge_bb); orig_phi;
7090 orig_phi = PHI_CHAIN (orig_phi))
7091 {
7092 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7093 new_exit_bb);
7094 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7095 add_phi_arg (new_phi, arg, new_exit_e);
7096 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7097 }
7098
7099 /* End loop-exit-fixes after versioning. */
7100
7101 update_ssa (TODO_update_ssa);
7102 if (cond_expr_stmt_list)
7103 {
7104 cond_exp_bsi = bsi_last (condition_bb);
7105 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
7106 }
7107 }
7108
7109 /* Remove a group of stores (for SLP or interleaving), free their
7110 stmt_vec_info. */
7111
7112 static void
7113 vect_remove_stores (tree first_stmt)
7114 {
7115 stmt_ann_t ann;
7116 tree next = first_stmt;
7117 tree tmp;
7118 stmt_vec_info next_stmt_info;
7119 block_stmt_iterator next_si;
7120
7121 while (next)
7122 {
7123 /* Free the attached stmt_vec_info and remove the stmt. */
7124 next_si = bsi_for_stmt (next);
7125 bsi_remove (&next_si, true);
7126 next_stmt_info = vinfo_for_stmt (next);
7127 ann = stmt_ann (next);
7128 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7129 free (next_stmt_info);
7130 set_stmt_info (ann, NULL);
7131 next = tmp;
7132 }
7133 }
7134
7135
7136 /* Vectorize SLP instance tree in postorder. */
7137
7138 static bool
7139 vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
7140 {
7141 tree stmt;
7142 bool strided_store, is_store;
7143 block_stmt_iterator si;
7144 stmt_vec_info stmt_info;
7145
7146 if (!node)
7147 return false;
7148
7149 vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
7150 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
7151
7152 stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
7153 stmt_info = vinfo_for_stmt (stmt);
7154 SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
7155 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7156
7157 if (vect_print_dump_info (REPORT_DETAILS))
7158 {
7159 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7160 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7161 }
7162
7163 si = bsi_for_stmt (stmt);
7164 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7165 if (is_store)
7166 {
7167 if (DR_GROUP_FIRST_DR (stmt_info))
7168 /* If IS_STORE is TRUE, the vectorization of the
7169 interleaving chain was completed - free all the stores in
7170 the chain. */
7171 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7172 else
7173 /* FORNOW: SLP originates only from strided stores. */
7174 gcc_unreachable ();
7175
7176 return true;
7177 }
7178
7179 /* FORNOW: SLP originates only from strided stores. */
7180 return false;
7181 }
7182
7183
7184 static bool
7185 vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
7186 {
7187 VEC (slp_instance, heap) *slp_instances =
7188 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7189 slp_instance instance;
7190 unsigned int vec_stmts_size;
7191 unsigned int group_size, i;
7192 unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7193 bool is_store = false;
7194
7195 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7196 {
7197 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7198 /* For each SLP instance calculate number of vector stmts to be created
7199 for the scalar stmts in each node of the SLP tree. Number of vector
7200 elements in one vector iteration is the number of scalar elements in
7201 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7202 size. */
7203 vec_stmts_size = vectorization_factor * group_size / nunits;
7204
7205 /* Schedule the tree of INSTANCE. */
7206 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7207 vec_stmts_size);
7208
7209 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7210 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7211 fprintf (vect_dump, "vectorizing stmts using SLP.");
7212 }
7213
7214 return is_store;
7215 }
7216
7217 /* Function vect_transform_loop.
7218
7219 The analysis phase has determined that the loop is vectorizable.
7220 Vectorize the loop - created vectorized stmts to replace the scalar
7221 stmts in the loop, and update the loop exit condition. */
7222
7223 void
7224 vect_transform_loop (loop_vec_info loop_vinfo)
7225 {
7226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7227 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7228 int nbbs = loop->num_nodes;
7229 block_stmt_iterator si, next_si;
7230 int i;
7231 tree ratio = NULL;
7232 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7233 bool strided_store;
7234 bool slp_scheduled = false;
7235 unsigned int nunits;
7236
7237 if (vect_print_dump_info (REPORT_DETAILS))
7238 fprintf (vect_dump, "=== vec_transform_loop ===");
7239 vect_loop_versioning (loop_vinfo);
7240
7241 /* CHECKME: we wouldn't need this if we called update_ssa once
7242 for all loops. */
7243 bitmap_zero (vect_memsyms_to_rename);
7244
7245 /* Peel the loop if there are data refs with unknown alignment.
7246 Only one data ref with unknown store is allowed. */
7247
7248 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7249 vect_do_peeling_for_alignment (loop_vinfo);
7250
7251 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7252 compile time constant), or it is a constant that doesn't divide by the
7253 vectorization factor, then an epilog loop needs to be created.
7254 We therefore duplicate the loop: the original loop will be vectorized,
7255 and will compute the first (n/VF) iterations. The second copy of the loop
7256 will remain scalar and will compute the remaining (n%VF) iterations.
7257 (VF is the vectorization factor). */
7258
7259 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7260 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7261 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
7262 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
7263 else
7264 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7265 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7266
7267 /* 1) Make sure the loop header has exactly two entries
7268 2) Make sure we have a preheader basic block. */
7269
7270 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7271
7272 split_edge (loop_preheader_edge (loop));
7273
7274 /* FORNOW: the vectorizer supports only loops which body consist
7275 of one basic block (header + empty latch). When the vectorizer will
7276 support more involved loop forms, the order by which the BBs are
7277 traversed need to be reconsidered. */
7278
7279 for (i = 0; i < nbbs; i++)
7280 {
7281 basic_block bb = bbs[i];
7282 stmt_vec_info stmt_info;
7283 tree phi;
7284
7285 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
7286 {
7287 if (vect_print_dump_info (REPORT_DETAILS))
7288 {
7289 fprintf (vect_dump, "------>vectorizing phi: ");
7290 print_generic_expr (vect_dump, phi, TDF_SLIM);
7291 }
7292 stmt_info = vinfo_for_stmt (phi);
7293 if (!stmt_info)
7294 continue;
7295
7296 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7297 && !STMT_VINFO_LIVE_P (stmt_info))
7298 continue;
7299
7300 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7301 != (unsigned HOST_WIDE_INT) vectorization_factor)
7302 && vect_print_dump_info (REPORT_DETAILS))
7303 fprintf (vect_dump, "multiple-types.");
7304
7305 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7306 {
7307 if (vect_print_dump_info (REPORT_DETAILS))
7308 fprintf (vect_dump, "transform phi.");
7309 vect_transform_stmt (phi, NULL, NULL, NULL);
7310 }
7311 }
7312
7313 for (si = bsi_start (bb); !bsi_end_p (si);)
7314 {
7315 tree stmt = bsi_stmt (si);
7316 bool is_store;
7317
7318 if (vect_print_dump_info (REPORT_DETAILS))
7319 {
7320 fprintf (vect_dump, "------>vectorizing statement: ");
7321 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7322 }
7323
7324 stmt_info = vinfo_for_stmt (stmt);
7325
7326 /* vector stmts created in the outer-loop during vectorization of
7327 stmts in an inner-loop may not have a stmt_info, and do not
7328 need to be vectorized. */
7329 if (!stmt_info)
7330 {
7331 bsi_next (&si);
7332 continue;
7333 }
7334
7335 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7336 && !STMT_VINFO_LIVE_P (stmt_info))
7337 {
7338 bsi_next (&si);
7339 continue;
7340 }
7341
7342 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
7343 nunits =
7344 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7345 if (!STMT_SLP_TYPE (stmt_info)
7346 && nunits != (unsigned int) vectorization_factor
7347 && vect_print_dump_info (REPORT_DETAILS))
7348 /* For SLP VF is set according to unrolling factor, and not to
7349 vector size, hence for SLP this print is not valid. */
7350 fprintf (vect_dump, "multiple-types.");
7351
7352 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7353 reached. */
7354 if (STMT_SLP_TYPE (stmt_info))
7355 {
7356 if (!slp_scheduled)
7357 {
7358 slp_scheduled = true;
7359
7360 if (vect_print_dump_info (REPORT_DETAILS))
7361 fprintf (vect_dump, "=== scheduling SLP instances ===");
7362
7363 is_store = vect_schedule_slp (loop_vinfo, nunits);
7364
7365 /* IS_STORE is true if STMT is a store. Stores cannot be of
7366 hybrid SLP type. They are removed in
7367 vect_schedule_slp_instance and their vinfo is destroyed. */
7368 if (is_store)
7369 {
7370 bsi_next (&si);
7371 continue;
7372 }
7373 }
7374
7375 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7376 if (PURE_SLP_STMT (stmt_info))
7377 {
7378 bsi_next (&si);
7379 continue;
7380 }
7381 }
7382
7383 /* -------- vectorize statement ------------ */
7384 if (vect_print_dump_info (REPORT_DETAILS))
7385 fprintf (vect_dump, "transform statement.");
7386
7387 strided_store = false;
7388 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
7389 if (is_store)
7390 {
7391 stmt_ann_t ann;
7392 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
7393 {
7394 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7395 interleaving chain was completed - free all the stores in
7396 the chain. */
7397 tree next = DR_GROUP_FIRST_DR (stmt_info);
7398 tree tmp;
7399 stmt_vec_info next_stmt_info;
7400
7401 while (next)
7402 {
7403 next_si = bsi_for_stmt (next);
7404 next_stmt_info = vinfo_for_stmt (next);
7405 /* Free the attached stmt_vec_info and remove the stmt. */
7406 ann = stmt_ann (next);
7407 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7408 free (next_stmt_info);
7409 set_stmt_info (ann, NULL);
7410 bsi_remove (&next_si, true);
7411 next = tmp;
7412 }
7413 bsi_remove (&si, true);
7414 continue;
7415 }
7416 else
7417 {
7418 /* Free the attached stmt_vec_info and remove the stmt. */
7419 ann = stmt_ann (stmt);
7420 free (stmt_info);
7421 set_stmt_info (ann, NULL);
7422 bsi_remove (&si, true);
7423 continue;
7424 }
7425 }
7426 bsi_next (&si);
7427 } /* stmts in BB */
7428 } /* BBs in loop */
7429
7430 slpeel_make_loop_iterate_ntimes (loop, ratio);
7431
7432 mark_set_for_renaming (vect_memsyms_to_rename);
7433
7434 /* The memory tags and pointers in vectorized statements need to
7435 have their SSA forms updated. FIXME, why can't this be delayed
7436 until all the loops have been transformed? */
7437 update_ssa (TODO_update_ssa);
7438
7439 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7440 fprintf (vect_dump, "LOOP VECTORIZED.");
7441 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7442 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
7443 }