[multiple changes]
[gcc.git] / gcc / tree-vect-transform.c
1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "optabs.h"
38 #include "recog.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
45 #include "toplev.h"
46 #include "real.h"
47
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
64
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static int vect_min_worthwhile_factor (enum tree_code);
74
75
76 /* Function vect_get_new_vect_var.
77
78 Returns a name for a new variable. The current naming scheme appends the
79 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
80 the name of vectorizer generated variables, and appends that to NAME if
81 provided. */
82
83 static tree
84 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
85 {
86 const char *prefix;
87 tree new_vect_var;
88
89 switch (var_kind)
90 {
91 case vect_simple_var:
92 prefix = "vect_";
93 break;
94 case vect_scalar_var:
95 prefix = "stmp_";
96 break;
97 case vect_pointer_var:
98 prefix = "vect_p";
99 break;
100 default:
101 gcc_unreachable ();
102 }
103
104 if (name)
105 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
106 else
107 new_vect_var = create_tmp_var (type, prefix);
108
109 return new_vect_var;
110 }
111
112
113 /* Function vect_create_addr_base_for_vector_ref.
114
115 Create an expression that computes the address of the first memory location
116 that will be accessed for a data reference.
117
118 Input:
119 STMT: The statement containing the data reference.
120 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
121 OFFSET: Optional. If supplied, it is be added to the initial address.
122
123 Output:
124 1. Return an SSA_NAME whose value is the address of the memory location of
125 the first vector of the data reference.
126 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
127 these statement(s) which define the returned SSA_NAME.
128
129 FORNOW: We are only handling array accesses with step 1. */
130
131 static tree
132 vect_create_addr_base_for_vector_ref (tree stmt,
133 tree *new_stmt_list,
134 tree offset)
135 {
136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
137 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
138 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
139 tree base_name = build_fold_indirect_ref (data_ref_base);
140 tree vec_stmt;
141 tree addr_base, addr_expr;
142 tree dest, new_stmt;
143 tree base_offset = unshare_expr (DR_OFFSET (dr));
144 tree init = unshare_expr (DR_INIT (dr));
145 tree vect_ptr_type, addr_expr2;
146
147 /* Create base_offset */
148 base_offset = size_binop (PLUS_EXPR, base_offset, init);
149 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
150 add_referenced_var (dest);
151 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
152 append_to_statement_list_force (new_stmt, new_stmt_list);
153
154 if (offset)
155 {
156 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
157 tree step;
158
159 /* For interleaved access step we divide STEP by the size of the
160 interleaving group. */
161 if (DR_GROUP_SIZE (stmt_info))
162 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
163 build_int_cst (TREE_TYPE (offset),
164 DR_GROUP_SIZE (stmt_info)));
165 else
166 step = DR_STEP (dr);
167
168 add_referenced_var (tmp);
169 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
170 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
171 base_offset, offset);
172 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
173 append_to_statement_list_force (new_stmt, new_stmt_list);
174 }
175
176 /* base + base_offset */
177 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
178 base_offset);
179
180 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
181
182 /* addr_expr = addr_base */
183 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
184 get_name (base_name));
185 add_referenced_var (addr_expr);
186 vec_stmt = fold_convert (vect_ptr_type, addr_base);
187 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
188 get_name (base_name));
189 add_referenced_var (addr_expr2);
190 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
191 append_to_statement_list_force (new_stmt, new_stmt_list);
192
193 if (vect_print_dump_info (REPORT_DETAILS))
194 {
195 fprintf (vect_dump, "created ");
196 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
197 }
198 return vec_stmt;
199 }
200
201
202 /* Function vect_create_data_ref_ptr.
203
204 Create a new pointer to vector type (vp), that points to the first location
205 accessed in the loop by STMT, along with the def-use update chain to
206 appropriately advance the pointer through the loop iterations. Also set
207 aliasing information for the pointer. This vector pointer is used by the
208 callers to this function to create a memory reference expression for vector
209 load/store access.
210
211 Input:
212 1. STMT: a stmt that references memory. Expected to be of the form
213 GIMPLE_MODIFY_STMT <name, data-ref> or
214 GIMPLE_MODIFY_STMT <data-ref, name>.
215 2. BSI: block_stmt_iterator where new stmts can be added.
216 3. OFFSET (optional): an offset to be added to the initial address accessed
217 by the data-ref in STMT.
218 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
219 pointing to the initial address.
220 5. TYPE: if not NULL indicates the required type of the data-ref
221
222 Output:
223 1. Declare a new ptr to vector_type, and have it point to the base of the
224 data reference (initial addressed accessed by the data reference).
225 For example, for vector of type V8HI, the following code is generated:
226
227 v8hi *vp;
228 vp = (v8hi *)initial_address;
229
230 if OFFSET is not supplied:
231 initial_address = &a[init];
232 if OFFSET is supplied:
233 initial_address = &a[init + OFFSET];
234
235 Return the initial_address in INITIAL_ADDRESS.
236
237 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
238 update the pointer in each iteration of the loop.
239
240 Return the increment stmt that updates the pointer in PTR_INCR.
241
242 3. Return the pointer. */
243
244 static tree
245 vect_create_data_ref_ptr (tree stmt,
246 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
247 tree offset, tree *initial_address, tree *ptr_incr,
248 bool only_init, tree type)
249 {
250 tree base_name;
251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
252 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
254 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
255 tree vect_ptr_type;
256 tree vect_ptr;
257 tree tag;
258 tree new_temp;
259 tree vec_stmt;
260 tree new_stmt_list = NULL_TREE;
261 edge pe = loop_preheader_edge (loop);
262 basic_block new_bb;
263 tree vect_ptr_init;
264 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
265
266 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
267
268 if (vect_print_dump_info (REPORT_DETAILS))
269 {
270 tree data_ref_base = base_name;
271 fprintf (vect_dump, "create vector-pointer variable to type: ");
272 print_generic_expr (vect_dump, vectype, TDF_SLIM);
273 if (TREE_CODE (data_ref_base) == VAR_DECL)
274 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
275 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
276 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
277 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
278 fprintf (vect_dump, " vectorizing a record based array ref: ");
279 else if (TREE_CODE (data_ref_base) == SSA_NAME)
280 fprintf (vect_dump, " vectorizing a pointer ref: ");
281 print_generic_expr (vect_dump, base_name, TDF_SLIM);
282 }
283
284 /** (1) Create the new vector-pointer variable: **/
285 if (type)
286 vect_ptr_type = build_pointer_type (type);
287 else
288 vect_ptr_type = build_pointer_type (vectype);
289 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
290 get_name (base_name));
291 add_referenced_var (vect_ptr);
292
293 /** (2) Add aliasing information to the new vector-pointer:
294 (The points-to info (DR_PTR_INFO) may be defined later.) **/
295
296 tag = DR_MEMTAG (dr);
297 gcc_assert (tag);
298
299 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
300 tag must be created with tag added to its may alias list. */
301 if (!MTAG_P (tag))
302 new_type_alias (vect_ptr, tag, DR_REF (dr));
303 else
304 set_symbol_mem_tag (vect_ptr, tag);
305
306 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
307
308 /** (3) Calculate the initial address the vector-pointer, and set
309 the vector-pointer to point to it before the loop: **/
310
311 /* Create: (&(base[init_val+offset]) in the loop preheader. */
312 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
313 offset);
314 pe = loop_preheader_edge (loop);
315 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
316 gcc_assert (!new_bb);
317 *initial_address = new_temp;
318
319 /* Create: p = (vectype *) initial_base */
320 vec_stmt = fold_convert (vect_ptr_type, new_temp);
321 vec_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vect_ptr, vec_stmt);
322 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
323 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
324 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
325 gcc_assert (!new_bb);
326
327
328 /** (4) Handle the updating of the vector-pointer inside the loop: **/
329
330 if (only_init) /* No update in loop is required. */
331 {
332 /* Copy the points-to information if it exists. */
333 if (DR_PTR_INFO (dr))
334 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
335 return vect_ptr_init;
336 }
337 else
338 {
339 block_stmt_iterator incr_bsi;
340 bool insert_after;
341 tree indx_before_incr, indx_after_incr;
342 tree incr;
343
344 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
345 create_iv (vect_ptr_init,
346 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
347 NULL_TREE, loop, &incr_bsi, insert_after,
348 &indx_before_incr, &indx_after_incr);
349 incr = bsi_stmt (incr_bsi);
350 set_stmt_info (stmt_ann (incr),
351 new_stmt_vec_info (incr, loop_vinfo));
352
353 /* Copy the points-to information if it exists. */
354 if (DR_PTR_INFO (dr))
355 {
356 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
357 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
358 }
359 merge_alias_info (vect_ptr_init, indx_before_incr);
360 merge_alias_info (vect_ptr_init, indx_after_incr);
361 if (ptr_incr)
362 *ptr_incr = incr;
363
364 return indx_before_incr;
365 }
366 }
367
368
369 /* Function bump_vector_ptr
370
371 Increment a pointer (to a vector type) by vector-size. Connect the new
372 increment stmt to the existing def-use update-chain of the pointer.
373
374 The pointer def-use update-chain before this function:
375 DATAREF_PTR = phi (p_0, p_2)
376 ....
377 PTR_INCR: p_2 = DATAREF_PTR + step
378
379 The pointer def-use update-chain after this function:
380 DATAREF_PTR = phi (p_0, p_2)
381 ....
382 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
383 ....
384 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
385
386 Input:
387 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
388 in the loop.
389 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
390 The increment amount across iterations is also expected to be
391 vector_size.
392 BSI - location where the new update stmt is to be placed.
393 STMT - the original scalar memory-access stmt that is being vectorized.
394
395 Output: Return NEW_DATAREF_PTR as illustrated above.
396
397 */
398
399 static tree
400 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
401 tree stmt)
402 {
403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
406 tree vptr_type = TREE_TYPE (dataref_ptr);
407 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
408 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
409 tree incr_stmt;
410 ssa_op_iter iter;
411 use_operand_p use_p;
412 tree new_dataref_ptr;
413
414 incr_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, ptr_var,
415 build2 (PLUS_EXPR, vptr_type, dataref_ptr, update));
416 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
417 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
418 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
419
420 /* Update the vector-pointer's cross-iteration increment. */
421 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
422 {
423 tree use = USE_FROM_PTR (use_p);
424
425 if (use == dataref_ptr)
426 SET_USE (use_p, new_dataref_ptr);
427 else
428 gcc_assert (tree_int_cst_compare (use, update) == 0);
429 }
430
431 /* Copy the points-to information if it exists. */
432 if (DR_PTR_INFO (dr))
433 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
434 merge_alias_info (new_dataref_ptr, dataref_ptr);
435
436 return new_dataref_ptr;
437 }
438
439
440 /* Function vect_create_destination_var.
441
442 Create a new temporary of type VECTYPE. */
443
444 static tree
445 vect_create_destination_var (tree scalar_dest, tree vectype)
446 {
447 tree vec_dest;
448 const char *new_name;
449 tree type;
450 enum vect_var_kind kind;
451
452 kind = vectype ? vect_simple_var : vect_scalar_var;
453 type = vectype ? vectype : TREE_TYPE (scalar_dest);
454
455 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
456
457 new_name = get_name (scalar_dest);
458 if (!new_name)
459 new_name = "var_";
460 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
461 add_referenced_var (vec_dest);
462
463 return vec_dest;
464 }
465
466
467 /* Function vect_init_vector.
468
469 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
470 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
471 used in the vectorization of STMT. */
472
473 static tree
474 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
475 {
476 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
478 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
479 tree new_var;
480 tree init_stmt;
481 tree vec_oprnd;
482 edge pe;
483 tree new_temp;
484 basic_block new_bb;
485
486 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
487 add_referenced_var (new_var);
488
489 init_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, new_var, vector_var);
490 new_temp = make_ssa_name (new_var, init_stmt);
491 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
492
493 pe = loop_preheader_edge (loop);
494 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
495 gcc_assert (!new_bb);
496
497 if (vect_print_dump_info (REPORT_DETAILS))
498 {
499 fprintf (vect_dump, "created new init_stmt: ");
500 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
501 }
502
503 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
504 return vec_oprnd;
505 }
506
507
508 /* Function vect_get_vec_def_for_operand.
509
510 OP is an operand in STMT. This function returns a (vector) def that will be
511 used in the vectorized stmt for STMT.
512
513 In the case that OP is an SSA_NAME which is defined in the loop, then
514 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
515
516 In case OP is an invariant or constant, a new stmt that creates a vector def
517 needs to be introduced. */
518
519 static tree
520 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
521 {
522 tree vec_oprnd;
523 tree vec_stmt;
524 tree def_stmt;
525 stmt_vec_info def_stmt_info = NULL;
526 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
527 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
528 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
529 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
531 tree vec_inv;
532 tree vec_cst;
533 tree t = NULL_TREE;
534 tree def;
535 int i;
536 enum vect_def_type dt;
537 bool is_simple_use;
538 tree vector_type;
539
540 if (vect_print_dump_info (REPORT_DETAILS))
541 {
542 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
543 print_generic_expr (vect_dump, op, TDF_SLIM);
544 }
545
546 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
547 gcc_assert (is_simple_use);
548 if (vect_print_dump_info (REPORT_DETAILS))
549 {
550 if (def)
551 {
552 fprintf (vect_dump, "def = ");
553 print_generic_expr (vect_dump, def, TDF_SLIM);
554 }
555 if (def_stmt)
556 {
557 fprintf (vect_dump, " def_stmt = ");
558 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
559 }
560 }
561
562 switch (dt)
563 {
564 /* Case 1: operand is a constant. */
565 case vect_constant_def:
566 {
567 if (scalar_def)
568 *scalar_def = op;
569
570 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
571 if (vect_print_dump_info (REPORT_DETAILS))
572 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
573
574 for (i = nunits - 1; i >= 0; --i)
575 {
576 t = tree_cons (NULL_TREE, op, t);
577 }
578 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
579 vec_cst = build_vector (vector_type, t);
580
581 return vect_init_vector (stmt, vec_cst, vector_type);
582 }
583
584 /* Case 2: operand is defined outside the loop - loop invariant. */
585 case vect_invariant_def:
586 {
587 if (scalar_def)
588 *scalar_def = def;
589
590 /* Create 'vec_inv = {inv,inv,..,inv}' */
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump, "Create vector_inv.");
593
594 for (i = nunits - 1; i >= 0; --i)
595 {
596 t = tree_cons (NULL_TREE, def, t);
597 }
598
599 /* FIXME: use build_constructor directly. */
600 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
601 vec_inv = build_constructor_from_list (vector_type, t);
602 return vect_init_vector (stmt, vec_inv, vector_type);
603 }
604
605 /* Case 3: operand is defined inside the loop. */
606 case vect_loop_def:
607 {
608 if (scalar_def)
609 *scalar_def = def_stmt;
610
611 /* Get the def from the vectorized stmt. */
612 def_stmt_info = vinfo_for_stmt (def_stmt);
613 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
614 gcc_assert (vec_stmt);
615 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
616 return vec_oprnd;
617 }
618
619 /* Case 4: operand is defined by a loop header phi - reduction */
620 case vect_reduction_def:
621 {
622 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
623
624 /* Get the def before the loop */
625 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
626 return get_initial_def_for_reduction (stmt, op, scalar_def);
627 }
628
629 /* Case 5: operand is defined by loop-header phi - induction. */
630 case vect_induction_def:
631 {
632 if (vect_print_dump_info (REPORT_DETAILS))
633 fprintf (vect_dump, "induction - unsupported.");
634 internal_error ("no support for induction"); /* FORNOW */
635 }
636
637 default:
638 gcc_unreachable ();
639 }
640 }
641
642
643 /* Function vect_get_vec_def_for_stmt_copy
644
645 Return a vector-def for an operand. This function is used when the
646 vectorized stmt to be created (by the caller to this function) is a "copy"
647 created in case the vectorized result cannot fit in one vector, and several
648 copies of the vector-stmt are required. In this case the vector-def is
649 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
650 of the stmt that defines VEC_OPRND.
651 DT is the type of the vector def VEC_OPRND.
652
653 Context:
654 In case the vectorization factor (VF) is bigger than the number
655 of elements that can fit in a vectype (nunits), we have to generate
656 more than one vector stmt to vectorize the scalar stmt. This situation
657 arises when there are multiple data-types operated upon in the loop; the
658 smallest data-type determines the VF, and as a result, when vectorizing
659 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
660 vector stmt (each computing a vector of 'nunits' results, and together
661 computing 'VF' results in each iteration). This function is called when
662 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
663 which VF=16 and nuniti=4, so the number of copies required is 4):
664
665 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
666
667 S1: x = load VS1.0: vx.0 = memref0 VS1.1
668 VS1.1: vx.1 = memref1 VS1.2
669 VS1.2: vx.2 = memref2 VS1.3
670 VS1.3: vx.3 = memref3
671
672 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
673 VSnew.1: vz1 = vx.1 + ... VSnew.2
674 VSnew.2: vz2 = vx.2 + ... VSnew.3
675 VSnew.3: vz3 = vx.3 + ...
676
677 The vectorization of S1 is explained in vectorizable_load.
678 The vectorization of S2:
679 To create the first vector-stmt out of the 4 copies - VSnew.0 -
680 the function 'vect_get_vec_def_for_operand' is called to
681 get the relevant vector-def for each operand of S2. For operand x it
682 returns the vector-def 'vx.0'.
683
684 To create the remaining copies of the vector-stmt (VSnew.j), this
685 function is called to get the relevant vector-def for each operand. It is
686 obtained from the respective VS1.j stmt, which is recorded in the
687 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
688
689 For example, to obtain the vector-def 'vx.1' in order to create the
690 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
691 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
692 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
693 and return its def ('vx.1').
694 Overall, to create the above sequence this function will be called 3 times:
695 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
696 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
697 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
698
699 static tree
700 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
701 {
702 tree vec_stmt_for_operand;
703 stmt_vec_info def_stmt_info;
704
705 if (dt == vect_invariant_def || dt == vect_constant_def)
706 {
707 /* Do nothing; can reuse same def. */ ;
708 return vec_oprnd;
709 }
710
711 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
712 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
713 gcc_assert (def_stmt_info);
714 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
715 gcc_assert (vec_stmt_for_operand);
716 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
717
718 return vec_oprnd;
719 }
720
721
722 /* Function vect_finish_stmt_generation.
723
724 Insert a new stmt. */
725
726 static void
727 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
728 block_stmt_iterator *bsi)
729 {
730 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
731 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
732
733 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
734 set_stmt_info (get_stmt_ann (vec_stmt),
735 new_stmt_vec_info (vec_stmt, loop_vinfo));
736
737 if (vect_print_dump_info (REPORT_DETAILS))
738 {
739 fprintf (vect_dump, "add new stmt: ");
740 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
741 }
742
743 /* Make sure bsi points to the stmt that is being vectorized. */
744 gcc_assert (stmt == bsi_stmt (*bsi));
745
746 #ifdef USE_MAPPED_LOCATION
747 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
748 #else
749 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
750 #endif
751 }
752
753
754 #define ADJUST_IN_EPILOG 1
755
756 /* Function get_initial_def_for_reduction
757
758 Input:
759 STMT - a stmt that performs a reduction operation in the loop.
760 INIT_VAL - the initial value of the reduction variable
761
762 Output:
763 SCALAR_DEF - a tree that holds a value to be added to the final result
764 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
765 Return a vector variable, initialized according to the operation that STMT
766 performs. This vector will be used as the initial value of the
767 vector of partial results.
768
769 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
770 add: [0,0,...,0,0]
771 mult: [1,1,...,1,1]
772 min/max: [init_val,init_val,..,init_val,init_val]
773 bit and/or: [init_val,init_val,..,init_val,init_val]
774 and when necessary (e.g. add/mult case) let the caller know
775 that it needs to adjust the result by init_val.
776
777 Option2: Initialize the vector as follows:
778 add: [0,0,...,0,init_val]
779 mult: [1,1,...,1,init_val]
780 min/max: [init_val,init_val,...,init_val]
781 bit and/or: [init_val,init_val,...,init_val]
782 and no adjustments are needed.
783
784 For example, for the following code:
785
786 s = init_val;
787 for (i=0;i<n;i++)
788 s = s + a[i];
789
790 STMT is 's = s + a[i]', and the reduction variable is 's'.
791 For a vector of 4 units, we want to return either [0,0,0,init_val],
792 or [0,0,0,0] and let the caller know that it needs to adjust
793 the result at the end by 'init_val'.
794
795 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
796 TODO: Use some cost-model to estimate which scheme is more profitable.
797 */
798
799 static tree
800 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
801 {
802 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
803 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
804 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
805 int nelements;
806 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
807 tree type = TREE_TYPE (init_val);
808 tree def;
809 tree vec, t = NULL_TREE;
810 bool need_epilog_adjust;
811 int i;
812 tree vector_type;
813
814 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
815
816 switch (code)
817 {
818 case WIDEN_SUM_EXPR:
819 case DOT_PROD_EXPR:
820 case PLUS_EXPR:
821 if (INTEGRAL_TYPE_P (type))
822 def = build_int_cst (type, 0);
823 else
824 def = build_real (type, dconst0);
825
826 #ifdef ADJUST_IN_EPILOG
827 /* All the 'nunits' elements are set to 0. The final result will be
828 adjusted by 'init_val' at the loop epilog. */
829 nelements = nunits;
830 need_epilog_adjust = true;
831 #else
832 /* 'nunits - 1' elements are set to 0; The last element is set to
833 'init_val'. No further adjustments at the epilog are needed. */
834 nelements = nunits - 1;
835 need_epilog_adjust = false;
836 #endif
837 break;
838
839 case MIN_EXPR:
840 case MAX_EXPR:
841 def = init_val;
842 nelements = nunits;
843 need_epilog_adjust = false;
844 break;
845
846 default:
847 gcc_unreachable ();
848 }
849
850 for (i = nelements - 1; i >= 0; --i)
851 t = tree_cons (NULL_TREE, def, t);
852
853 if (nelements == nunits - 1)
854 {
855 /* Set the last element of the vector. */
856 t = tree_cons (NULL_TREE, init_val, t);
857 nelements += 1;
858 }
859 gcc_assert (nelements == nunits);
860
861 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
862 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
863 vec = build_vector (vector_type, t);
864 else
865 vec = build_constructor_from_list (vector_type, t);
866
867 if (!need_epilog_adjust)
868 *scalar_def = NULL_TREE;
869 else
870 *scalar_def = init_val;
871
872 return vect_init_vector (stmt, vec, vector_type);
873 }
874
875
876 /* Function vect_create_epilog_for_reduction
877
878 Create code at the loop-epilog to finalize the result of a reduction
879 computation.
880
881 VECT_DEF is a vector of partial results.
882 REDUC_CODE is the tree-code for the epilog reduction.
883 STMT is the scalar reduction stmt that is being vectorized.
884 REDUCTION_PHI is the phi-node that carries the reduction computation.
885
886 This function:
887 1. Creates the reduction def-use cycle: sets the the arguments for
888 REDUCTION_PHI:
889 The loop-entry argument is the vectorized initial-value of the reduction.
890 The loop-latch argument is VECT_DEF - the vector of partial sums.
891 2. "Reduces" the vector of partial results VECT_DEF into a single result,
892 by applying the operation specified by REDUC_CODE if available, or by
893 other means (whole-vector shifts or a scalar loop).
894 The function also creates a new phi node at the loop exit to preserve
895 loop-closed form, as illustrated below.
896
897 The flow at the entry to this function:
898
899 loop:
900 vec_def = phi <null, null> # REDUCTION_PHI
901 VECT_DEF = vector_stmt # vectorized form of STMT
902 s_loop = scalar_stmt # (scalar) STMT
903 loop_exit:
904 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
905 use <s_out0>
906 use <s_out0>
907
908 The above is transformed by this function into:
909
910 loop:
911 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
912 VECT_DEF = vector_stmt # vectorized form of STMT
913 s_loop = scalar_stmt # (scalar) STMT
914 loop_exit:
915 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
916 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
917 v_out2 = reduce <v_out1>
918 s_out3 = extract_field <v_out2, 0>
919 s_out4 = adjust_result <s_out3>
920 use <s_out4>
921 use <s_out4>
922 */
923
924 static void
925 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
926 enum tree_code reduc_code, tree reduction_phi)
927 {
928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
929 tree vectype;
930 enum machine_mode mode;
931 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
932 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
933 basic_block exit_bb;
934 tree scalar_dest;
935 tree scalar_type;
936 tree new_phi;
937 block_stmt_iterator exit_bsi;
938 tree vec_dest;
939 tree new_temp;
940 tree new_name;
941 tree epilog_stmt;
942 tree new_scalar_dest, exit_phi;
943 tree bitsize, bitpos, bytesize;
944 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
945 tree scalar_initial_def;
946 tree vec_initial_def;
947 tree orig_name;
948 imm_use_iterator imm_iter;
949 use_operand_p use_p;
950 bool extract_scalar_result;
951 tree reduction_op;
952 tree orig_stmt;
953 tree use_stmt;
954 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
955 int op_type;
956
957 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
958 reduction_op = TREE_OPERAND (operation, op_type-1);
959 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
960 mode = TYPE_MODE (vectype);
961
962 /*** 1. Create the reduction def-use cycle ***/
963
964 /* 1.1 set the loop-entry arg of the reduction-phi: */
965 /* For the case of reduction, vect_get_vec_def_for_operand returns
966 the scalar def before the loop, that defines the initial value
967 of the reduction variable. */
968 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
969 &scalar_initial_def);
970 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
971
972 /* 1.2 set the loop-latch arg for the reduction-phi: */
973 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
974
975 if (vect_print_dump_info (REPORT_DETAILS))
976 {
977 fprintf (vect_dump, "transform reduction: created def-use cycle:");
978 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
979 fprintf (vect_dump, "\n");
980 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
981 }
982
983
984 /*** 2. Create epilog code
985 The reduction epilog code operates across the elements of the vector
986 of partial results computed by the vectorized loop.
987 The reduction epilog code consists of:
988 step 1: compute the scalar result in a vector (v_out2)
989 step 2: extract the scalar result (s_out3) from the vector (v_out2)
990 step 3: adjust the scalar result (s_out3) if needed.
991
992 Step 1 can be accomplished using one the following three schemes:
993 (scheme 1) using reduc_code, if available.
994 (scheme 2) using whole-vector shifts, if available.
995 (scheme 3) using a scalar loop. In this case steps 1+2 above are
996 combined.
997
998 The overall epilog code looks like this:
999
1000 s_out0 = phi <s_loop> # original EXIT_PHI
1001 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1002 v_out2 = reduce <v_out1> # step 1
1003 s_out3 = extract_field <v_out2, 0> # step 2
1004 s_out4 = adjust_result <s_out3> # step 3
1005
1006 (step 3 is optional, and step2 1 and 2 may be combined).
1007 Lastly, the uses of s_out0 are replaced by s_out4.
1008
1009 ***/
1010
1011 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1012 v_out1 = phi <v_loop> */
1013
1014 exit_bb = single_exit (loop)->dest;
1015 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1016 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1017 exit_bsi = bsi_start (exit_bb);
1018
1019 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1020 (i.e. when reduc_code is not available) and in the final adjustment code
1021 (if needed). Also get the original scalar reduction variable as
1022 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1023 represents a reduction pattern), the tree-code and scalar-def are
1024 taken from the original stmt that the pattern-stmt (STMT) replaces.
1025 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1026 are taken from STMT. */
1027
1028 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1029 if (!orig_stmt)
1030 {
1031 /* Regular reduction */
1032 orig_stmt = stmt;
1033 }
1034 else
1035 {
1036 /* Reduction pattern */
1037 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1038 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1039 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1040 }
1041 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1042 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1043 scalar_type = TREE_TYPE (scalar_dest);
1044 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1045 bitsize = TYPE_SIZE (scalar_type);
1046 bytesize = TYPE_SIZE_UNIT (scalar_type);
1047
1048 /* 2.3 Create the reduction code, using one of the three schemes described
1049 above. */
1050
1051 if (reduc_code < NUM_TREE_CODES)
1052 {
1053 /*** Case 1: Create:
1054 v_out2 = reduc_expr <v_out1> */
1055
1056 if (vect_print_dump_info (REPORT_DETAILS))
1057 fprintf (vect_dump, "Reduce using direct vector reduction.");
1058
1059 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1060 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
1061 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
1062 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1063 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1064 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1065
1066 extract_scalar_result = true;
1067 }
1068 else
1069 {
1070 enum tree_code shift_code = 0;
1071 bool have_whole_vector_shift = true;
1072 int bit_offset;
1073 int element_bitsize = tree_low_cst (bitsize, 1);
1074 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1075 tree vec_temp;
1076
1077 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1078 shift_code = VEC_RSHIFT_EXPR;
1079 else
1080 have_whole_vector_shift = false;
1081
1082 /* Regardless of whether we have a whole vector shift, if we're
1083 emulating the operation via tree-vect-generic, we don't want
1084 to use it. Only the first round of the reduction is likely
1085 to still be profitable via emulation. */
1086 /* ??? It might be better to emit a reduction tree code here, so that
1087 tree-vect-generic can expand the first round via bit tricks. */
1088 if (!VECTOR_MODE_P (mode))
1089 have_whole_vector_shift = false;
1090 else
1091 {
1092 optab optab = optab_for_tree_code (code, vectype);
1093 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1094 have_whole_vector_shift = false;
1095 }
1096
1097 if (have_whole_vector_shift)
1098 {
1099 /*** Case 2: Create:
1100 for (offset = VS/2; offset >= element_size; offset/=2)
1101 {
1102 Create: va' = vec_shift <va, offset>
1103 Create: va = vop <va, va'>
1104 } */
1105
1106 if (vect_print_dump_info (REPORT_DETAILS))
1107 fprintf (vect_dump, "Reduce using vector shifts");
1108
1109 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1110 new_temp = PHI_RESULT (new_phi);
1111
1112 for (bit_offset = vec_size_in_bits/2;
1113 bit_offset >= element_bitsize;
1114 bit_offset /= 2)
1115 {
1116 tree bitpos = size_int (bit_offset);
1117
1118 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1119 vec_dest,
1120 build2 (shift_code, vectype,
1121 new_temp, bitpos));
1122 new_name = make_ssa_name (vec_dest, epilog_stmt);
1123 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1124 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1125
1126 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1127 vec_dest,
1128 build2 (code, vectype,
1129 new_name, new_temp));
1130 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1131 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1132 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1133 }
1134
1135 extract_scalar_result = true;
1136 }
1137 else
1138 {
1139 tree rhs;
1140
1141 /*** Case 3: Create:
1142 s = extract_field <v_out2, 0>
1143 for (offset = element_size;
1144 offset < vector_size;
1145 offset += element_size;)
1146 {
1147 Create: s' = extract_field <v_out2, offset>
1148 Create: s = op <s, s'>
1149 } */
1150
1151 if (vect_print_dump_info (REPORT_DETAILS))
1152 fprintf (vect_dump, "Reduce using scalar code. ");
1153
1154 vec_temp = PHI_RESULT (new_phi);
1155 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1156 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1157 bitsize_zero_node);
1158 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1159 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1160 new_scalar_dest, rhs);
1161 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1162 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1163 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1164
1165 for (bit_offset = element_bitsize;
1166 bit_offset < vec_size_in_bits;
1167 bit_offset += element_bitsize)
1168 {
1169 tree bitpos = bitsize_int (bit_offset);
1170 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1171 bitpos);
1172
1173 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1174 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1175 new_scalar_dest, rhs);
1176 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1177 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1178 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1179
1180 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1181 new_scalar_dest,
1182 build2 (code, scalar_type, new_name, new_temp));
1183 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1184 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1185 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1186 }
1187
1188 extract_scalar_result = false;
1189 }
1190 }
1191
1192 /* 2.4 Extract the final scalar result. Create:
1193 s_out3 = extract_field <v_out2, bitpos> */
1194
1195 if (extract_scalar_result)
1196 {
1197 tree rhs;
1198
1199 if (vect_print_dump_info (REPORT_DETAILS))
1200 fprintf (vect_dump, "extract scalar result");
1201
1202 if (BYTES_BIG_ENDIAN)
1203 bitpos = size_binop (MULT_EXPR,
1204 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1205 TYPE_SIZE (scalar_type));
1206 else
1207 bitpos = bitsize_zero_node;
1208
1209 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1210 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1211 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1212 new_scalar_dest, rhs);
1213 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1214 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1215 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1216 }
1217
1218 /* 2.4 Adjust the final result by the initial value of the reduction
1219 variable. (When such adjustment is not needed, then
1220 'scalar_initial_def' is zero).
1221
1222 Create:
1223 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1224
1225 if (scalar_initial_def)
1226 {
1227 epilog_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
1228 new_scalar_dest,
1229 build2 (code, scalar_type, new_temp, scalar_initial_def));
1230 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1231 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1232 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1233 }
1234
1235 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1236
1237 /* Find the loop-closed-use at the loop exit of the original scalar result.
1238 (The reduction result is expected to have two immediate uses - one at the
1239 latch block, and one at the loop exit). */
1240 exit_phi = NULL;
1241 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1242 {
1243 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1244 {
1245 exit_phi = USE_STMT (use_p);
1246 break;
1247 }
1248 }
1249 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1250 gcc_assert (exit_phi);
1251 /* Replace the uses: */
1252 orig_name = PHI_RESULT (exit_phi);
1253 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1254 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1255 SET_USE (use_p, new_temp);
1256 }
1257
1258
1259 /* Function vectorizable_reduction.
1260
1261 Check if STMT performs a reduction operation that can be vectorized.
1262 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1263 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1264 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1265
1266 This function also handles reduction idioms (patterns) that have been
1267 recognized in advance during vect_pattern_recog. In this case, STMT may be
1268 of this form:
1269 X = pattern_expr (arg0, arg1, ..., X)
1270 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1271 sequence that had been detected and replaced by the pattern-stmt (STMT).
1272
1273 In some cases of reduction patterns, the type of the reduction variable X is
1274 different than the type of the other arguments of STMT.
1275 In such cases, the vectype that is used when transforming STMT into a vector
1276 stmt is different than the vectype that is used to determine the
1277 vectorization factor, because it consists of a different number of elements
1278 than the actual number of elements that are being operated upon in parallel.
1279
1280 For example, consider an accumulation of shorts into an int accumulator.
1281 On some targets it's possible to vectorize this pattern operating on 8
1282 shorts at a time (hence, the vectype for purposes of determining the
1283 vectorization factor should be V8HI); on the other hand, the vectype that
1284 is used to create the vector form is actually V4SI (the type of the result).
1285
1286 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1287 indicates what is the actual level of parallelism (V8HI in the example), so
1288 that the right vectorization factor would be derived. This vectype
1289 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1290 be used to create the vectorized stmt. The right vectype for the vectorized
1291 stmt is obtained from the type of the result X:
1292 get_vectype_for_scalar_type (TREE_TYPE (X))
1293
1294 This means that, contrary to "regular" reductions (or "regular" stmts in
1295 general), the following equation:
1296 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1297 does *NOT* necessarily hold for reduction patterns. */
1298
1299 bool
1300 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1301 {
1302 tree vec_dest;
1303 tree scalar_dest;
1304 tree op;
1305 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1307 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1310 tree operation;
1311 enum tree_code code, orig_code, epilog_reduc_code = 0;
1312 enum machine_mode vec_mode;
1313 int op_type;
1314 optab optab, reduc_optab;
1315 tree new_temp = NULL_TREE;
1316 tree def, def_stmt;
1317 enum vect_def_type dt;
1318 tree new_phi;
1319 tree scalar_type;
1320 bool is_simple_use;
1321 tree orig_stmt;
1322 stmt_vec_info orig_stmt_info;
1323 tree expr = NULL_TREE;
1324 int i;
1325 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1326 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1327 stmt_vec_info prev_stmt_info;
1328 tree reduc_def;
1329 tree new_stmt = NULL_TREE;
1330 int j;
1331
1332 gcc_assert (ncopies >= 1);
1333
1334 /* 1. Is vectorizable reduction? */
1335
1336 /* Not supportable if the reduction variable is used in the loop. */
1337 if (STMT_VINFO_RELEVANT_P (stmt_info))
1338 return false;
1339
1340 if (!STMT_VINFO_LIVE_P (stmt_info))
1341 return false;
1342
1343 /* Make sure it was already recognized as a reduction computation. */
1344 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1345 return false;
1346
1347 /* 2. Has this been recognized as a reduction pattern?
1348
1349 Check if STMT represents a pattern that has been recognized
1350 in earlier analysis stages. For stmts that represent a pattern,
1351 the STMT_VINFO_RELATED_STMT field records the last stmt in
1352 the original sequence that constitutes the pattern. */
1353
1354 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1355 if (orig_stmt)
1356 {
1357 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1358 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1359 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1360 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1361 }
1362
1363 /* 3. Check the operands of the operation. The first operands are defined
1364 inside the loop body. The last operand is the reduction variable,
1365 which is defined by the loop-header-phi. */
1366
1367 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1368
1369 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1370 code = TREE_CODE (operation);
1371 op_type = TREE_CODE_LENGTH (code);
1372 if (op_type != binary_op && op_type != ternary_op)
1373 return false;
1374 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1375 scalar_type = TREE_TYPE (scalar_dest);
1376
1377 /* All uses but the last are expected to be defined in the loop.
1378 The last use is the reduction variable. */
1379 for (i = 0; i < op_type-1; i++)
1380 {
1381 op = TREE_OPERAND (operation, i);
1382 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1383 gcc_assert (is_simple_use);
1384 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1385 dt == vect_constant_def);
1386 }
1387
1388 op = TREE_OPERAND (operation, i);
1389 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1390 gcc_assert (is_simple_use);
1391 gcc_assert (dt == vect_reduction_def);
1392 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1393 if (orig_stmt)
1394 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1395 else
1396 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1397
1398 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1399 return false;
1400
1401 /* 4. Supportable by target? */
1402
1403 /* 4.1. check support for the operation in the loop */
1404 optab = optab_for_tree_code (code, vectype);
1405 if (!optab)
1406 {
1407 if (vect_print_dump_info (REPORT_DETAILS))
1408 fprintf (vect_dump, "no optab.");
1409 return false;
1410 }
1411 vec_mode = TYPE_MODE (vectype);
1412 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1413 {
1414 if (vect_print_dump_info (REPORT_DETAILS))
1415 fprintf (vect_dump, "op not supported by target.");
1416 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1417 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1418 < vect_min_worthwhile_factor (code))
1419 return false;
1420 if (vect_print_dump_info (REPORT_DETAILS))
1421 fprintf (vect_dump, "proceeding using word mode.");
1422 }
1423
1424 /* Worthwhile without SIMD support? */
1425 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1426 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1427 < vect_min_worthwhile_factor (code))
1428 {
1429 if (vect_print_dump_info (REPORT_DETAILS))
1430 fprintf (vect_dump, "not worthwhile without SIMD support.");
1431 return false;
1432 }
1433
1434 /* 4.2. Check support for the epilog operation.
1435
1436 If STMT represents a reduction pattern, then the type of the
1437 reduction variable may be different than the type of the rest
1438 of the arguments. For example, consider the case of accumulation
1439 of shorts into an int accumulator; The original code:
1440 S1: int_a = (int) short_a;
1441 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1442
1443 was replaced with:
1444 STMT: int_acc = widen_sum <short_a, int_acc>
1445
1446 This means that:
1447 1. The tree-code that is used to create the vector operation in the
1448 epilog code (that reduces the partial results) is not the
1449 tree-code of STMT, but is rather the tree-code of the original
1450 stmt from the pattern that STMT is replacing. I.e, in the example
1451 above we want to use 'widen_sum' in the loop, but 'plus' in the
1452 epilog.
1453 2. The type (mode) we use to check available target support
1454 for the vector operation to be created in the *epilog*, is
1455 determined by the type of the reduction variable (in the example
1456 above we'd check this: plus_optab[vect_int_mode]).
1457 However the type (mode) we use to check available target support
1458 for the vector operation to be created *inside the loop*, is
1459 determined by the type of the other arguments to STMT (in the
1460 example we'd check this: widen_sum_optab[vect_short_mode]).
1461
1462 This is contrary to "regular" reductions, in which the types of all
1463 the arguments are the same as the type of the reduction variable.
1464 For "regular" reductions we can therefore use the same vector type
1465 (and also the same tree-code) when generating the epilog code and
1466 when generating the code inside the loop. */
1467
1468 if (orig_stmt)
1469 {
1470 /* This is a reduction pattern: get the vectype from the type of the
1471 reduction variable, and get the tree-code from orig_stmt. */
1472 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1473 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1474 vec_mode = TYPE_MODE (vectype);
1475 }
1476 else
1477 {
1478 /* Regular reduction: use the same vectype and tree-code as used for
1479 the vector code inside the loop can be used for the epilog code. */
1480 orig_code = code;
1481 }
1482
1483 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1484 return false;
1485 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1486 if (!reduc_optab)
1487 {
1488 if (vect_print_dump_info (REPORT_DETAILS))
1489 fprintf (vect_dump, "no optab for reduction.");
1490 epilog_reduc_code = NUM_TREE_CODES;
1491 }
1492 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1493 {
1494 if (vect_print_dump_info (REPORT_DETAILS))
1495 fprintf (vect_dump, "reduc op not supported by target.");
1496 epilog_reduc_code = NUM_TREE_CODES;
1497 }
1498
1499 if (!vec_stmt) /* transformation not required. */
1500 {
1501 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1502 return true;
1503 }
1504
1505 /** Transform. **/
1506
1507 if (vect_print_dump_info (REPORT_DETAILS))
1508 fprintf (vect_dump, "transform reduction.");
1509
1510 /* Create the destination vector */
1511 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1512
1513 /* Create the reduction-phi that defines the reduction-operand. */
1514 new_phi = create_phi_node (vec_dest, loop->header);
1515
1516 /* In case the vectorization factor (VF) is bigger than the number
1517 of elements that we can fit in a vectype (nunits), we have to generate
1518 more than one vector stmt - i.e - we need to "unroll" the
1519 vector stmt by a factor VF/nunits. For more details see documentation
1520 in vectorizable_operation. */
1521
1522 prev_stmt_info = NULL;
1523 for (j = 0; j < ncopies; j++)
1524 {
1525 /* Handle uses. */
1526 if (j == 0)
1527 {
1528 op = TREE_OPERAND (operation, 0);
1529 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1530 if (op_type == ternary_op)
1531 {
1532 op = TREE_OPERAND (operation, 1);
1533 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1534 }
1535
1536 /* Get the vector def for the reduction variable from the phi node */
1537 reduc_def = PHI_RESULT (new_phi);
1538 }
1539 else
1540 {
1541 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1542 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1543 if (op_type == ternary_op)
1544 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1545
1546 /* Get the vector def for the reduction variable from the vectorized
1547 reduction operation generated in the previous iteration (j-1) */
1548 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
1549 }
1550
1551 /* Arguments are ready. create the new vector stmt. */
1552
1553 if (op_type == binary_op)
1554 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1555 else
1556 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1557 reduc_def);
1558 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, expr);
1559 new_temp = make_ssa_name (vec_dest, new_stmt);
1560 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
1561 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1562
1563 if (j == 0)
1564 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1565 else
1566 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1567 prev_stmt_info = vinfo_for_stmt (new_stmt);
1568 }
1569
1570 /* Finalize the reduction-phi (set it's arguments) and create the
1571 epilog reduction code. */
1572 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1573 return true;
1574 }
1575
1576 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1577 true if the target has a vectorized version of the function,
1578 or false if the function cannot be vectorized. */
1579
1580 bool
1581 vectorizable_function (tree call, tree vectype)
1582 {
1583 tree fndecl = get_callee_fndecl (call);
1584
1585 /* We only handle functions that do not read or clobber memory -- i.e.
1586 const or novops ones. */
1587 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1588 return false;
1589
1590 if (!fndecl
1591 || TREE_CODE (fndecl) != FUNCTION_DECL
1592 || !DECL_BUILT_IN (fndecl))
1593 return false;
1594
1595 if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
1596 return true;
1597
1598 return false;
1599 }
1600
1601 /* Returns an expression that performs a call to vectorized version
1602 of FNDECL in type VECTYPE, with the arguments given by ARGS.
1603 If extra statements need to be generated, they are inserted
1604 before BSI. */
1605
1606 static tree
1607 build_vectorized_function_call (tree fndecl,
1608 tree vectype, tree args)
1609 {
1610 tree vfndecl;
1611 enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
1612
1613 /* The target specific builtin should be available. */
1614 vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
1615 gcc_assert (vfndecl != NULL_TREE);
1616
1617 return build_function_call_expr (vfndecl, args);
1618 }
1619
1620 /* Function vectorizable_call.
1621
1622 Check if STMT performs a function call that can be vectorized.
1623 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1624 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1625 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1626
1627 bool
1628 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1629 {
1630 tree vec_dest;
1631 tree scalar_dest;
1632 tree operation;
1633 tree op, args, type;
1634 tree vec_oprnd, vargs, *pvargs_end;
1635 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1636 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1638 tree fndecl, rhs, new_temp, def, def_stmt;
1639 enum vect_def_type dt;
1640
1641 /* Is STMT a vectorizable call? */
1642 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1643 return false;
1644
1645 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
1646 return false;
1647
1648 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1649 if (TREE_CODE (operation) != CALL_EXPR)
1650 return false;
1651
1652 /* For now, we only vectorize functions if a target specific builtin
1653 is available. TODO -- in some cases, it might be profitable to
1654 insert the calls for pieces of the vector, in order to be able
1655 to vectorize other operations in the loop. */
1656 if (!vectorizable_function (operation, vectype))
1657 {
1658 if (vect_print_dump_info (REPORT_DETAILS))
1659 fprintf (vect_dump, "function is not vectorizable.");
1660
1661 return false;
1662 }
1663 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
1664
1665 for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
1666 {
1667 op = TREE_VALUE (args);
1668
1669 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1670 {
1671 if (vect_print_dump_info (REPORT_DETAILS))
1672 fprintf (vect_dump, "use not simple.");
1673 return false;
1674 }
1675 }
1676
1677 if (!vec_stmt) /* transformation not required. */
1678 {
1679 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1680 return true;
1681 }
1682
1683 /** Transform. **/
1684
1685 if (vect_print_dump_info (REPORT_DETAILS))
1686 fprintf (vect_dump, "transform operation.");
1687
1688 /* Handle def. */
1689 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1690 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1691
1692 /* Handle uses. */
1693 vargs = NULL_TREE;
1694 pvargs_end = &vargs;
1695 for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
1696 {
1697 op = TREE_VALUE (args);
1698 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1699
1700 *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
1701 pvargs_end = &TREE_CHAIN (*pvargs_end);
1702 }
1703
1704 fndecl = get_callee_fndecl (operation);
1705 rhs = build_vectorized_function_call (fndecl, vectype, vargs);
1706 *vec_stmt = build2 (GIMPLE_MODIFY_STMT, vectype, vec_dest, rhs);
1707 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1708 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
1709
1710 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1711
1712 /* The call in STMT might prevent it from being removed in dce. We however
1713 cannot remove it here, due to the way the ssa name it defines is mapped
1714 to the new definition. So just replace rhs of the statement with something
1715 harmless. */
1716 type = TREE_TYPE (scalar_dest);
1717 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
1718
1719 return true;
1720 }
1721
1722
1723 /* Function vectorizable_assignment.
1724
1725 Check if STMT performs an assignment (copy) that can be vectorized.
1726 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1727 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1728 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1729
1730 bool
1731 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1732 {
1733 tree vec_dest;
1734 tree scalar_dest;
1735 tree op;
1736 tree vec_oprnd;
1737 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1738 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1739 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1740 tree new_temp;
1741 tree def, def_stmt;
1742 enum vect_def_type dt;
1743 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1744 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1745
1746 gcc_assert (ncopies >= 1);
1747 if (ncopies > 1)
1748 return false; /* FORNOW */
1749
1750 /* Is vectorizable assignment? */
1751 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1752 return false;
1753
1754 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1755
1756 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1757 return false;
1758
1759 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1760 if (TREE_CODE (scalar_dest) != SSA_NAME)
1761 return false;
1762
1763 op = GIMPLE_STMT_OPERAND (stmt, 1);
1764 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1765 {
1766 if (vect_print_dump_info (REPORT_DETAILS))
1767 fprintf (vect_dump, "use not simple.");
1768 return false;
1769 }
1770
1771 if (!vec_stmt) /* transformation not required. */
1772 {
1773 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1774 return true;
1775 }
1776
1777 /** Transform. **/
1778 if (vect_print_dump_info (REPORT_DETAILS))
1779 fprintf (vect_dump, "transform assignment.");
1780
1781 /* Handle def. */
1782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1783
1784 /* Handle use. */
1785 op = GIMPLE_STMT_OPERAND (stmt, 1);
1786 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1787
1788 /* Arguments are ready. create the new vector stmt. */
1789 *vec_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, vec_oprnd);
1790 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1791 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
1792 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1793
1794 return true;
1795 }
1796
1797
1798 /* Function vect_min_worthwhile_factor.
1799
1800 For a loop where we could vectorize the operation indicated by CODE,
1801 return the minimum vectorization factor that makes it worthwhile
1802 to use generic vectors. */
1803 static int
1804 vect_min_worthwhile_factor (enum tree_code code)
1805 {
1806 switch (code)
1807 {
1808 case PLUS_EXPR:
1809 case MINUS_EXPR:
1810 case NEGATE_EXPR:
1811 return 4;
1812
1813 case BIT_AND_EXPR:
1814 case BIT_IOR_EXPR:
1815 case BIT_XOR_EXPR:
1816 case BIT_NOT_EXPR:
1817 return 2;
1818
1819 default:
1820 return INT_MAX;
1821 }
1822 }
1823
1824
1825 /* Function vectorizable_operation.
1826
1827 Check if STMT performs a binary or unary operation that can be vectorized.
1828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1831
1832 bool
1833 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1834 {
1835 tree vec_dest;
1836 tree scalar_dest;
1837 tree operation;
1838 tree op0, op1 = NULL;
1839 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1841 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1843 enum tree_code code;
1844 enum machine_mode vec_mode;
1845 tree new_temp;
1846 int op_type;
1847 optab optab;
1848 int icode;
1849 enum machine_mode optab_op2_mode;
1850 tree def, def_stmt;
1851 enum vect_def_type dt0, dt1;
1852 tree new_stmt;
1853 stmt_vec_info prev_stmt_info;
1854 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1855 int nunits_out;
1856 tree vectype_out;
1857 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1858 int j;
1859
1860 gcc_assert (ncopies >= 1);
1861
1862 /* Is STMT a vectorizable binary/unary operation? */
1863 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1864 return false;
1865
1866 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1867
1868 if (STMT_VINFO_LIVE_P (stmt_info))
1869 {
1870 /* FORNOW: not yet supported. */
1871 if (vect_print_dump_info (REPORT_DETAILS))
1872 fprintf (vect_dump, "value used after loop.");
1873 return false;
1874 }
1875
1876 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1877 return false;
1878
1879 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
1880 return false;
1881
1882 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
1883 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1884 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1885 if (nunits_out != nunits_in)
1886 return false;
1887
1888 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1889 code = TREE_CODE (operation);
1890 optab = optab_for_tree_code (code, vectype);
1891
1892 /* Support only unary or binary operations. */
1893 op_type = TREE_CODE_LENGTH (code);
1894 if (op_type != unary_op && op_type != binary_op)
1895 {
1896 if (vect_print_dump_info (REPORT_DETAILS))
1897 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1898 return false;
1899 }
1900
1901 op0 = TREE_OPERAND (operation, 0);
1902 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
1903 {
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 fprintf (vect_dump, "use not simple.");
1906 return false;
1907 }
1908
1909 if (op_type == binary_op)
1910 {
1911 op1 = TREE_OPERAND (operation, 1);
1912 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
1913 {
1914 if (vect_print_dump_info (REPORT_DETAILS))
1915 fprintf (vect_dump, "use not simple.");
1916 return false;
1917 }
1918 }
1919
1920 /* Supportable by target? */
1921 if (!optab)
1922 {
1923 if (vect_print_dump_info (REPORT_DETAILS))
1924 fprintf (vect_dump, "no optab.");
1925 return false;
1926 }
1927 vec_mode = TYPE_MODE (vectype);
1928 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1929 if (icode == CODE_FOR_nothing)
1930 {
1931 if (vect_print_dump_info (REPORT_DETAILS))
1932 fprintf (vect_dump, "op not supported by target.");
1933 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1934 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1935 < vect_min_worthwhile_factor (code))
1936 return false;
1937 if (vect_print_dump_info (REPORT_DETAILS))
1938 fprintf (vect_dump, "proceeding using word mode.");
1939 }
1940
1941 /* Worthwhile without SIMD support? */
1942 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1943 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1944 < vect_min_worthwhile_factor (code))
1945 {
1946 if (vect_print_dump_info (REPORT_DETAILS))
1947 fprintf (vect_dump, "not worthwhile without SIMD support.");
1948 return false;
1949 }
1950
1951 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1952 {
1953 /* FORNOW: not yet supported. */
1954 if (!VECTOR_MODE_P (vec_mode))
1955 return false;
1956
1957 /* Invariant argument is needed for a vector shift
1958 by a scalar shift operand. */
1959 optab_op2_mode = insn_data[icode].operand[2].mode;
1960 if (! (VECTOR_MODE_P (optab_op2_mode)
1961 || dt1 == vect_constant_def
1962 || dt1 == vect_invariant_def))
1963 {
1964 if (vect_print_dump_info (REPORT_DETAILS))
1965 fprintf (vect_dump, "operand mode requires invariant argument.");
1966 return false;
1967 }
1968 }
1969
1970 if (!vec_stmt) /* transformation not required. */
1971 {
1972 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1973 return true;
1974 }
1975
1976 /** Transform. **/
1977
1978 if (vect_print_dump_info (REPORT_DETAILS))
1979 fprintf (vect_dump, "transform binary/unary operation.");
1980
1981 /* Handle def. */
1982 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1983
1984 /* In case the vectorization factor (VF) is bigger than the number
1985 of elements that we can fit in a vectype (nunits), we have to generate
1986 more than one vector stmt - i.e - we need to "unroll" the
1987 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1988 from one copy of the vector stmt to the next, in the field
1989 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1990 stages to find the correct vector defs to be used when vectorizing
1991 stmts that use the defs of the current stmt. The example below illustrates
1992 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1993 4 vectorized stmts):
1994
1995 before vectorization:
1996 RELATED_STMT VEC_STMT
1997 S1: x = memref - -
1998 S2: z = x + 1 - -
1999
2000 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2001 there):
2002 RELATED_STMT VEC_STMT
2003 VS1_0: vx0 = memref0 VS1_1 -
2004 VS1_1: vx1 = memref1 VS1_2 -
2005 VS1_2: vx2 = memref2 VS1_3 -
2006 VS1_3: vx3 = memref3 - -
2007 S1: x = load - VS1_0
2008 S2: z = x + 1 - -
2009
2010 step2: vectorize stmt S2 (done here):
2011 To vectorize stmt S2 we first need to find the relevant vector
2012 def for the first operand 'x'. This is, as usual, obtained from
2013 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2014 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2015 relevant vector def 'vx0'. Having found 'vx0' we can generate
2016 the vector stmt VS2_0, and as usual, record it in the
2017 STMT_VINFO_VEC_STMT of stmt S2.
2018 When creating the second copy (VS2_1), we obtain the relevant vector
2019 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2020 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2021 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2022 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2023 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2024 chain of stmts and pointers:
2025 RELATED_STMT VEC_STMT
2026 VS1_0: vx0 = memref0 VS1_1 -
2027 VS1_1: vx1 = memref1 VS1_2 -
2028 VS1_2: vx2 = memref2 VS1_3 -
2029 VS1_3: vx3 = memref3 - -
2030 S1: x = load - VS1_0
2031 VS2_0: vz0 = vx0 + v1 VS2_1 -
2032 VS2_1: vz1 = vx1 + v1 VS2_2 -
2033 VS2_2: vz2 = vx2 + v1 VS2_3 -
2034 VS2_3: vz3 = vx3 + v1 - -
2035 S2: z = x + 1 - VS2_0 */
2036
2037 prev_stmt_info = NULL;
2038 for (j = 0; j < ncopies; j++)
2039 {
2040 /* Handle uses. */
2041 if (j == 0)
2042 {
2043 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2044 if (op_type == binary_op)
2045 {
2046 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
2047 {
2048 /* Vector shl and shr insn patterns can be defined with
2049 scalar operand 2 (shift operand). In this case, use
2050 constant or loop invariant op1 directly, without
2051 extending it to vector mode first. */
2052 optab_op2_mode = insn_data[icode].operand[2].mode;
2053 if (!VECTOR_MODE_P (optab_op2_mode))
2054 {
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "operand 1 using scalar mode.");
2057 vec_oprnd1 = op1;
2058 }
2059 }
2060 if (!vec_oprnd1)
2061 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2062 }
2063 }
2064 else
2065 {
2066 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2067 if (op_type == binary_op)
2068 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2069 }
2070
2071 /* Arguments are ready. create the new vector stmt. */
2072
2073 if (op_type == binary_op)
2074 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
2075 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2076 else
2077 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
2078 build1 (code, vectype, vec_oprnd0));
2079 new_temp = make_ssa_name (vec_dest, new_stmt);
2080 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2081 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2082
2083 if (j == 0)
2084 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2085 else
2086 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2087 prev_stmt_info = vinfo_for_stmt (new_stmt);
2088 }
2089
2090 return true;
2091 }
2092
2093
2094 /* Function vectorizable_type_demotion
2095
2096 Check if STMT performs a binary or unary operation that involves
2097 type demotion, and if it can be vectorized.
2098 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2099 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2100 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2101
2102 bool
2103 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
2104 tree *vec_stmt)
2105 {
2106 tree vec_dest;
2107 tree scalar_dest;
2108 tree operation;
2109 tree op0;
2110 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2111 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2113 enum tree_code code;
2114 tree new_temp;
2115 tree def, def_stmt;
2116 enum vect_def_type dt0;
2117 tree new_stmt;
2118 stmt_vec_info prev_stmt_info;
2119 int nunits_in;
2120 int nunits_out;
2121 tree vectype_out;
2122 int ncopies;
2123 int j;
2124 tree expr;
2125 tree vectype_in;
2126 tree scalar_type;
2127 optab optab;
2128 enum machine_mode vec_mode;
2129
2130 /* Is STMT a vectorizable type-demotion operation? */
2131
2132 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2133 return false;
2134
2135 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2136
2137 if (STMT_VINFO_LIVE_P (stmt_info))
2138 {
2139 /* FORNOW: not yet supported. */
2140 if (vect_print_dump_info (REPORT_DETAILS))
2141 fprintf (vect_dump, "value used after loop.");
2142 return false;
2143 }
2144
2145 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2146 return false;
2147
2148 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2149 return false;
2150
2151 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2152 code = TREE_CODE (operation);
2153 if (code != NOP_EXPR && code != CONVERT_EXPR)
2154 return false;
2155
2156 op0 = TREE_OPERAND (operation, 0);
2157 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2158 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2159
2160 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2161 scalar_type = TREE_TYPE (scalar_dest);
2162 vectype_out = get_vectype_for_scalar_type (scalar_type);
2163 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2164 if (nunits_in != nunits_out / 2) /* FORNOW */
2165 return false;
2166
2167 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2168 gcc_assert (ncopies >= 1);
2169
2170 /* Check the operands of the operation. */
2171 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2172 {
2173 if (vect_print_dump_info (REPORT_DETAILS))
2174 fprintf (vect_dump, "use not simple.");
2175 return false;
2176 }
2177
2178 /* Supportable by target? */
2179 code = VEC_PACK_MOD_EXPR;
2180 optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
2181 if (!optab)
2182 return false;
2183
2184 vec_mode = TYPE_MODE (vectype_in);
2185 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2186 return false;
2187
2188 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2189
2190 if (!vec_stmt) /* transformation not required. */
2191 {
2192 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2193 return true;
2194 }
2195
2196 /** Transform. **/
2197
2198 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2200 ncopies);
2201
2202 /* Handle def. */
2203 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2204
2205 /* In case the vectorization factor (VF) is bigger than the number
2206 of elements that we can fit in a vectype (nunits), we have to generate
2207 more than one vector stmt - i.e - we need to "unroll" the
2208 vector stmt by a factor VF/nunits. */
2209 prev_stmt_info = NULL;
2210 for (j = 0; j < ncopies; j++)
2211 {
2212 /* Handle uses. */
2213 if (j == 0)
2214 {
2215 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2216 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2217 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd0);
2218 }
2219 else
2220 {
2221 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2222 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2223 }
2224
2225 /* Arguments are ready. Create the new vector stmt. */
2226 expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
2227 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, expr);
2228 new_temp = make_ssa_name (vec_dest, new_stmt);
2229 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2230 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2231
2232 if (j == 0)
2233 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2234 else
2235 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2236
2237 prev_stmt_info = vinfo_for_stmt (new_stmt);
2238 }
2239
2240 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2241 return true;
2242 }
2243
2244
2245 /* Function vect_gen_widened_results_half
2246
2247 Create a vector stmt whose code, type, number of arguments, and result
2248 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2249 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2250 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2251 needs to be created (DECL is a function-decl of a target-builtin).
2252 STMT is the original scalar stmt that we are vectorizing. */
2253
2254 static tree
2255 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2256 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2257 tree vec_dest, block_stmt_iterator *bsi,
2258 tree stmt)
2259 {
2260 tree vec_params;
2261 tree expr;
2262 tree new_stmt;
2263 tree new_temp;
2264 tree sym;
2265 ssa_op_iter iter;
2266
2267 /* Generate half of the widened result: */
2268 if (code == CALL_EXPR)
2269 {
2270 /* Target specific support */
2271 vec_params = build_tree_list (NULL_TREE, vec_oprnd0);
2272 if (op_type == binary_op)
2273 vec_params = tree_cons (NULL_TREE, vec_oprnd1, vec_params);
2274 expr = build_function_call_expr (decl, vec_params);
2275 }
2276 else
2277 {
2278 /* Generic support */
2279 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2280 if (op_type == binary_op)
2281 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2282 else
2283 expr = build1 (code, vectype, vec_oprnd0);
2284 }
2285 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, expr);
2286 new_temp = make_ssa_name (vec_dest, new_stmt);
2287 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2288 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2289
2290 if (code == CALL_EXPR)
2291 {
2292 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2293 {
2294 if (TREE_CODE (sym) == SSA_NAME)
2295 sym = SSA_NAME_VAR (sym);
2296 mark_sym_for_renaming (sym);
2297 }
2298 }
2299
2300 return new_stmt;
2301 }
2302
2303
2304 /* Function vectorizable_type_promotion
2305
2306 Check if STMT performs a binary or unary operation that involves
2307 type promotion, and if it can be vectorized.
2308 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2309 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2310 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2311
2312 bool
2313 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2314 tree *vec_stmt)
2315 {
2316 tree vec_dest;
2317 tree scalar_dest;
2318 tree operation;
2319 tree op0, op1 = NULL;
2320 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2321 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2322 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2323 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2324 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2325 int op_type;
2326 tree def, def_stmt;
2327 enum vect_def_type dt0, dt1;
2328 tree new_stmt;
2329 stmt_vec_info prev_stmt_info;
2330 int nunits_in;
2331 int nunits_out;
2332 tree vectype_out;
2333 int ncopies;
2334 int j;
2335 tree vectype_in;
2336
2337 /* Is STMT a vectorizable type-promotion operation? */
2338
2339 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2340 return false;
2341
2342 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2343
2344 if (STMT_VINFO_LIVE_P (stmt_info))
2345 {
2346 /* FORNOW: not yet supported. */
2347 if (vect_print_dump_info (REPORT_DETAILS))
2348 fprintf (vect_dump, "value used after loop.");
2349 return false;
2350 }
2351
2352 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2353 return false;
2354
2355 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2356 return false;
2357
2358 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2359 code = TREE_CODE (operation);
2360 if (code != NOP_EXPR && code != WIDEN_MULT_EXPR)
2361 return false;
2362
2363 op0 = TREE_OPERAND (operation, 0);
2364 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2365 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2366 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2367 gcc_assert (ncopies >= 1);
2368
2369 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2370 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2371 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2372 if (nunits_out != nunits_in / 2) /* FORNOW */
2373 return false;
2374
2375 /* Check the operands of the operation. */
2376 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2377 {
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "use not simple.");
2380 return false;
2381 }
2382
2383 op_type = TREE_CODE_LENGTH (code);
2384 if (op_type == binary_op)
2385 {
2386 op1 = TREE_OPERAND (operation, 1);
2387 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2388 {
2389 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "use not simple.");
2391 return false;
2392 }
2393 }
2394
2395 /* Supportable by target? */
2396 if (!supportable_widening_operation (code, stmt, vectype_in,
2397 &decl1, &decl2, &code1, &code2))
2398 return false;
2399
2400 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2401
2402 if (!vec_stmt) /* transformation not required. */
2403 {
2404 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2405 return true;
2406 }
2407
2408 /** Transform. **/
2409
2410 if (vect_print_dump_info (REPORT_DETAILS))
2411 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2412 ncopies);
2413
2414 /* Handle def. */
2415 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2416
2417 /* In case the vectorization factor (VF) is bigger than the number
2418 of elements that we can fit in a vectype (nunits), we have to generate
2419 more than one vector stmt - i.e - we need to "unroll" the
2420 vector stmt by a factor VF/nunits. */
2421
2422 prev_stmt_info = NULL;
2423 for (j = 0; j < ncopies; j++)
2424 {
2425 /* Handle uses. */
2426 if (j == 0)
2427 {
2428 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2429 if (op_type == binary_op)
2430 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2431 }
2432 else
2433 {
2434 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2435 if (op_type == binary_op)
2436 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2437 }
2438
2439 /* Arguments are ready. Create the new vector stmt. We are creating
2440 two vector defs because the widened result does not fit in one vector.
2441 The vectorized stmt can be expressed as a call to a taregt builtin,
2442 or a using a tree-code. */
2443 /* Generate first half of the widened result: */
2444 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2445 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2446 if (j == 0)
2447 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2448 else
2449 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2450 prev_stmt_info = vinfo_for_stmt (new_stmt);
2451
2452 /* Generate second half of the widened result: */
2453 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2454 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2455 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2456 prev_stmt_info = vinfo_for_stmt (new_stmt);
2457
2458 }
2459
2460 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2461 return true;
2462 }
2463
2464
2465 /* Function vect_strided_store_supported.
2466
2467 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2468 and FALSE otherwise. */
2469
2470 static bool
2471 vect_strided_store_supported (tree vectype)
2472 {
2473 optab interleave_high_optab, interleave_low_optab;
2474 int mode;
2475
2476 mode = (int) TYPE_MODE (vectype);
2477
2478 /* Check that the operation is supported. */
2479 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2480 vectype);
2481 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2482 vectype);
2483 if (!interleave_high_optab || !interleave_low_optab)
2484 {
2485 if (vect_print_dump_info (REPORT_DETAILS))
2486 fprintf (vect_dump, "no optab for interleave.");
2487 return false;
2488 }
2489
2490 if (interleave_high_optab->handlers[(int) mode].insn_code
2491 == CODE_FOR_nothing
2492 || interleave_low_optab->handlers[(int) mode].insn_code
2493 == CODE_FOR_nothing)
2494 {
2495 if (vect_print_dump_info (REPORT_DETAILS))
2496 fprintf (vect_dump, "interleave op not supported by target.");
2497 return false;
2498 }
2499 return true;
2500 }
2501
2502
2503 /* Function vect_permute_store_chain.
2504
2505 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2506 a power of 2, generate interleave_high/low stmts to reorder the data
2507 correctly for the stores. Return the final references for stores in
2508 RESULT_CHAIN.
2509
2510 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2511 The input is 4 vectors each containing 8 elements. We assign a number to each
2512 element, the input sequence is:
2513
2514 1st vec: 0 1 2 3 4 5 6 7
2515 2nd vec: 8 9 10 11 12 13 14 15
2516 3rd vec: 16 17 18 19 20 21 22 23
2517 4th vec: 24 25 26 27 28 29 30 31
2518
2519 The output sequence should be:
2520
2521 1st vec: 0 8 16 24 1 9 17 25
2522 2nd vec: 2 10 18 26 3 11 19 27
2523 3rd vec: 4 12 20 28 5 13 21 30
2524 4th vec: 6 14 22 30 7 15 23 31
2525
2526 i.e., we interleave the contents of the four vectors in their order.
2527
2528 We use interleave_high/low instructions to create such output. The input of
2529 each interleave_high/low operation is two vectors:
2530 1st vec 2nd vec
2531 0 1 2 3 4 5 6 7
2532 the even elements of the result vector are obtained left-to-right from the
2533 high/low elements of the first vector. The odd elements of the result are
2534 obtained left-to-right from the high/low elements of the second vector.
2535 The output of interleave_high will be: 0 4 1 5
2536 and of interleave_low: 2 6 3 7
2537
2538
2539 The permutation is done in log LENGTH stages. In each stage interleave_high
2540 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2541 where the first argument is taken from the first half of DR_CHAIN and the
2542 second argument from it's second half.
2543 In our example,
2544
2545 I1: interleave_high (1st vec, 3rd vec)
2546 I2: interleave_low (1st vec, 3rd vec)
2547 I3: interleave_high (2nd vec, 4th vec)
2548 I4: interleave_low (2nd vec, 4th vec)
2549
2550 The output for the first stage is:
2551
2552 I1: 0 16 1 17 2 18 3 19
2553 I2: 4 20 5 21 6 22 7 23
2554 I3: 8 24 9 25 10 26 11 27
2555 I4: 12 28 13 29 14 30 15 31
2556
2557 The output of the second stage, i.e. the final result is:
2558
2559 I1: 0 8 16 24 1 9 17 25
2560 I2: 2 10 18 26 3 11 19 27
2561 I3: 4 12 20 28 5 13 21 30
2562 I4: 6 14 22 30 7 15 23 31. */
2563
2564 static bool
2565 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2566 unsigned int length,
2567 tree stmt,
2568 block_stmt_iterator *bsi,
2569 VEC(tree,heap) **result_chain)
2570 {
2571 tree perm_dest, perm_stmt, vect1, vect2, high, low;
2572 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2573 tree scalar_dest;
2574 int i;
2575 unsigned int j;
2576 VEC(tree,heap) *first, *second;
2577
2578 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2579 first = VEC_alloc (tree, heap, length/2);
2580 second = VEC_alloc (tree, heap, length/2);
2581
2582 /* Check that the operation is supported. */
2583 if (!vect_strided_store_supported (vectype))
2584 return false;
2585
2586 *result_chain = VEC_copy (tree, heap, dr_chain);
2587
2588 for (i = 0; i < exact_log2 (length); i++)
2589 {
2590 for (j = 0; j < length/2; j++)
2591 {
2592 vect1 = VEC_index (tree, dr_chain, j);
2593 vect2 = VEC_index (tree, dr_chain, j+length/2);
2594
2595 /* high = interleave_high (vect1, vect2); */
2596 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2597 add_referenced_var (perm_dest);
2598 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
2599 build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1,
2600 vect2));
2601 high = make_ssa_name (perm_dest, perm_stmt);
2602 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
2603 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
2604 VEC_replace (tree, *result_chain, 2*j, high);
2605
2606 /* low = interleave_low (vect1, vect2); */
2607 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2608 add_referenced_var (perm_dest);
2609 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
2610 build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1,
2611 vect2));
2612 low = make_ssa_name (perm_dest, perm_stmt);
2613 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
2614 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
2615 VEC_replace (tree, *result_chain, 2*j+1, low);
2616 }
2617 dr_chain = VEC_copy (tree, heap, *result_chain);
2618 }
2619 return true;
2620 }
2621
2622
2623 /* Function vectorizable_store.
2624
2625 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2626 can be vectorized.
2627 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2628 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2629 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2630
2631 bool
2632 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2633 {
2634 tree scalar_dest;
2635 tree data_ref;
2636 tree op;
2637 tree vec_oprnd = NULL_TREE;
2638 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2639 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2640 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2641 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2642 enum machine_mode vec_mode;
2643 tree dummy;
2644 enum dr_alignment_support alignment_support_cheme;
2645 ssa_op_iter iter;
2646 def_operand_p def_p;
2647 tree def, def_stmt;
2648 enum vect_def_type dt;
2649 stmt_vec_info prev_stmt_info = NULL;
2650 tree dataref_ptr = NULL_TREE;
2651 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2652 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2653 int j;
2654 tree next_stmt, first_stmt;
2655 bool strided_store = false;
2656 unsigned int group_size, i;
2657 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2658 gcc_assert (ncopies >= 1);
2659
2660 /* Is vectorizable store? */
2661
2662 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2663 return false;
2664
2665 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2666 if (TREE_CODE (scalar_dest) != ARRAY_REF
2667 && TREE_CODE (scalar_dest) != INDIRECT_REF
2668 && !DR_GROUP_FIRST_DR (stmt_info))
2669 return false;
2670
2671 op = GIMPLE_STMT_OPERAND (stmt, 1);
2672 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2673 {
2674 if (vect_print_dump_info (REPORT_DETAILS))
2675 fprintf (vect_dump, "use not simple.");
2676 return false;
2677 }
2678
2679 vec_mode = TYPE_MODE (vectype);
2680 /* FORNOW. In some cases can vectorize even if data-type not supported
2681 (e.g. - array initialization with 0). */
2682 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2683 return false;
2684
2685 if (!STMT_VINFO_DATA_REF (stmt_info))
2686 return false;
2687
2688 if (DR_GROUP_FIRST_DR (stmt_info))
2689 {
2690 strided_store = true;
2691 if (!vect_strided_store_supported (vectype))
2692 return false;
2693 }
2694
2695 if (!vec_stmt) /* transformation not required. */
2696 {
2697 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2698 return true;
2699 }
2700
2701 /** Transform. **/
2702
2703 if (vect_print_dump_info (REPORT_DETAILS))
2704 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2705
2706 if (strided_store)
2707 {
2708 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2709 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2710 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2711
2712 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2713
2714 /* We vectorize all the stmts of the interleaving group when we
2715 reach the last stmt in the group. */
2716 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2717 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
2718 {
2719 *vec_stmt = NULL_TREE;
2720 return true;
2721 }
2722 }
2723 else
2724 {
2725 first_stmt = stmt;
2726 first_dr = dr;
2727 group_size = 1;
2728 }
2729
2730 dr_chain = VEC_alloc (tree, heap, group_size);
2731 oprnds = VEC_alloc (tree, heap, group_size);
2732
2733 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
2734 gcc_assert (alignment_support_cheme);
2735 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
2736
2737 /* In case the vectorization factor (VF) is bigger than the number
2738 of elements that we can fit in a vectype (nunits), we have to generate
2739 more than one vector stmt - i.e - we need to "unroll" the
2740 vector stmt by a factor VF/nunits. For more details see documentation in
2741 vect_get_vec_def_for_copy_stmt. */
2742
2743 /* In case of interleaving (non-unit strided access):
2744
2745 S1: &base + 2 = x2
2746 S2: &base = x0
2747 S3: &base + 1 = x1
2748 S4: &base + 3 = x3
2749
2750 We create vectorized storess starting from base address (the access of the
2751 first stmt in the chain (S2 in the above example), when the last store stmt
2752 of the chain (S4) is reached:
2753
2754 VS1: &base = vx2
2755 VS2: &base + vec_size*1 = vx0
2756 VS3: &base + vec_size*2 = vx1
2757 VS4: &base + vec_size*3 = vx3
2758
2759 Then permutation statements are generated:
2760
2761 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2762 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2763 ...
2764
2765 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2766 (the order of the data-refs in the output of vect_permute_store_chain
2767 corresponds to the order of scalar stmts in the interleaving chain - see
2768 the documentation of vect_permute_store_chain()).
2769
2770 In case of both multiple types and interleaving, above vector stores and
2771 permutation stmts are created for every copy. The result vector stmts are
2772 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
2773 STMT_VINFO_RELATED_STMT for the next copies.
2774 */
2775
2776 prev_stmt_info = NULL;
2777 for (j = 0; j < ncopies; j++)
2778 {
2779 tree new_stmt;
2780 tree ptr_incr;
2781
2782 if (j == 0)
2783 {
2784 /* For interleaved stores we collect vectorized defs for all the
2785 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
2786 as an input to vect_permute_store_chain(), and OPRNDS as an input
2787 to vect_get_vec_def_for_stmt_copy() for the next copy.
2788 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2789 OPRNDS are of size 1.
2790 */
2791 next_stmt = first_stmt;
2792 for (i = 0; i < group_size; i++)
2793 {
2794 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
2795 is the exact number of stmts in the chain. Therefore, NEXT_STMT
2796 can't be NULL_TREE. In case that there is no interleaving,
2797 GROUP_SIZE is 1, and only one iteration of the loop will be
2798 executed.
2799 */
2800 gcc_assert (next_stmt);
2801 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
2802 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
2803 VEC_quick_push(tree, dr_chain, vec_oprnd);
2804 VEC_quick_push(tree, oprnds, vec_oprnd);
2805 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2806 }
2807 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
2808 &dummy, &ptr_incr, false,
2809 TREE_TYPE (vec_oprnd));
2810 }
2811 else
2812 {
2813 /* For interleaved stores we created vectorized defs for all the
2814 defs stored in OPRNDS in the previous iteration (previous copy).
2815 DR_CHAIN is then used as an input to vect_permute_store_chain(),
2816 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
2817 next copy.
2818 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
2819 OPRNDS are of size 1.
2820 */
2821 for (i = 0; i < group_size; i++)
2822 {
2823 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
2824 VEC_index (tree, oprnds, i));
2825 VEC_replace(tree, dr_chain, i, vec_oprnd);
2826 VEC_replace(tree, oprnds, i, vec_oprnd);
2827 }
2828 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2829 }
2830
2831 if (strided_store)
2832 {
2833 result_chain = VEC_alloc (tree, heap, group_size);
2834 /* Permute. */
2835 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
2836 &result_chain))
2837 return false;
2838 }
2839
2840 next_stmt = first_stmt;
2841 for (i = 0; i < group_size; i++)
2842 {
2843 /* For strided stores vectorized defs are interleaved in
2844 vect_permute_store_chain(). */
2845 if (strided_store)
2846 vec_oprnd = VEC_index(tree, result_chain, i);
2847
2848 data_ref = build_fold_indirect_ref (dataref_ptr);
2849 /* Arguments are ready. Create the new vector stmt. */
2850 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, data_ref,
2851 vec_oprnd);
2852 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2853
2854 /* Set the VDEFs for the vector pointer. If this virtual def
2855 has a use outside the loop and a loop peel is performed
2856 then the def may be renamed by the peel. Mark it for
2857 renaming so the later use will also be renamed. */
2858 copy_virtual_operands (new_stmt, next_stmt);
2859 if (j == 0)
2860 {
2861 /* The original store is deleted so the same SSA_NAMEs
2862 can be used. */
2863 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
2864 {
2865 SSA_NAME_DEF_STMT (def) = new_stmt;
2866 mark_sym_for_renaming (SSA_NAME_VAR (def));
2867 }
2868
2869 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2870 }
2871 else
2872 {
2873 /* Create new names for all the definitions created by COPY and
2874 add replacement mappings for each new name. */
2875 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
2876 {
2877 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
2878 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
2879 }
2880
2881 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2882 }
2883
2884 prev_stmt_info = vinfo_for_stmt (new_stmt);
2885 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2886 if (!next_stmt)
2887 break;
2888 /* Bump the vector pointer. */
2889 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2890 }
2891 }
2892
2893 return true;
2894 }
2895
2896
2897 /* Function vect_setup_realignment
2898
2899 This function is called when vectorizing an unaligned load using
2900 the dr_unaligned_software_pipeline scheme.
2901 This function generates the following code at the loop prolog:
2902
2903 p = initial_addr;
2904 msq_init = *(floor(p)); # prolog load
2905 realignment_token = call target_builtin;
2906 loop:
2907 msq = phi (msq_init, ---)
2908
2909 The code above sets up a new (vector) pointer, pointing to the first
2910 location accessed by STMT, and a "floor-aligned" load using that pointer.
2911 It also generates code to compute the "realignment-token" (if the relevant
2912 target hook was defined), and creates a phi-node at the loop-header bb
2913 whose arguments are the result of the prolog-load (created by this
2914 function) and the result of a load that takes place in the loop (to be
2915 created by the caller to this function).
2916 The caller to this function uses the phi-result (msq) to create the
2917 realignment code inside the loop, and sets up the missing phi argument,
2918 as follows:
2919
2920 loop:
2921 msq = phi (msq_init, lsq)
2922 lsq = *(floor(p')); # load in loop
2923 result = realign_load (msq, lsq, realignment_token);
2924
2925 Input:
2926 STMT - (scalar) load stmt to be vectorized. This load accesses
2927 a memory location that may be unaligned.
2928 BSI - place where new code is to be inserted.
2929
2930 Output:
2931 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2932 target hook, if defined.
2933 Return value - the result of the loop-header phi node. */
2934
2935 static tree
2936 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
2937 tree *realignment_token)
2938 {
2939 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2940 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2941 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2942 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2943 edge pe = loop_preheader_edge (loop);
2944 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2945 tree vec_dest;
2946 tree init_addr;
2947 tree inc;
2948 tree ptr;
2949 tree data_ref;
2950 tree new_stmt;
2951 basic_block new_bb;
2952 tree msq_init;
2953 tree new_temp;
2954 tree phi_stmt;
2955 tree msq;
2956
2957 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
2958 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2959 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
2960 NULL_TREE);
2961 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2962 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest, data_ref);
2963 new_temp = make_ssa_name (vec_dest, new_stmt);
2964 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2965 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2966 gcc_assert (!new_bb);
2967 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
2968 copy_virtual_operands (new_stmt, stmt);
2969 update_vuses_to_preheader (new_stmt, loop);
2970
2971 /* 2. Create permutation mask, if required, in loop preheader. */
2972 if (targetm.vectorize.builtin_mask_for_load)
2973 {
2974 tree builtin_decl;
2975 tree params = build_tree_list (NULL_TREE, init_addr);
2976
2977 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2978 new_stmt = build_function_call_expr (builtin_decl, params);
2979 vec_dest = vect_create_destination_var (scalar_dest,
2980 TREE_TYPE (new_stmt));
2981 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
2982 new_stmt);
2983 new_temp = make_ssa_name (vec_dest, new_stmt);
2984 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2985 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2986 gcc_assert (!new_bb);
2987 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
2988
2989 /* The result of the CALL_EXPR to this builtin is determined from
2990 the value of the parameter and no global variables are touched
2991 which makes the builtin a "const" function. Requiring the
2992 builtin to have the "const" attribute makes it unnecessary
2993 to call mark_call_clobbered. */
2994 gcc_assert (TREE_READONLY (builtin_decl));
2995 }
2996
2997 /* 3. Create msq = phi <msq_init, lsq> in loop */
2998 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2999 msq = make_ssa_name (vec_dest, NULL_TREE);
3000 phi_stmt = create_phi_node (msq, loop->header);
3001 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3002 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
3003
3004 return msq;
3005 }
3006
3007
3008 /* Function vect_strided_load_supported.
3009
3010 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3011 and FALSE otherwise. */
3012
3013 static bool
3014 vect_strided_load_supported (tree vectype)
3015 {
3016 optab perm_even_optab, perm_odd_optab;
3017 int mode;
3018
3019 mode = (int) TYPE_MODE (vectype);
3020
3021 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
3022 if (!perm_even_optab)
3023 {
3024 if (vect_print_dump_info (REPORT_DETAILS))
3025 fprintf (vect_dump, "no optab for perm_even.");
3026 return false;
3027 }
3028
3029 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3030 {
3031 if (vect_print_dump_info (REPORT_DETAILS))
3032 fprintf (vect_dump, "perm_even op not supported by target.");
3033 return false;
3034 }
3035
3036 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
3037 if (!perm_odd_optab)
3038 {
3039 if (vect_print_dump_info (REPORT_DETAILS))
3040 fprintf (vect_dump, "no optab for perm_odd.");
3041 return false;
3042 }
3043
3044 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3045 {
3046 if (vect_print_dump_info (REPORT_DETAILS))
3047 fprintf (vect_dump, "perm_odd op not supported by target.");
3048 return false;
3049 }
3050 return true;
3051 }
3052
3053
3054 /* Function vect_permute_load_chain.
3055
3056 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3057 a power of 2, generate extract_even/odd stmts to reorder the input data
3058 correctly. Return the final references for loads in RESULT_CHAIN.
3059
3060 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3061 The input is 4 vectors each containing 8 elements. We assign a number to each
3062 element, the input sequence is:
3063
3064 1st vec: 0 1 2 3 4 5 6 7
3065 2nd vec: 8 9 10 11 12 13 14 15
3066 3rd vec: 16 17 18 19 20 21 22 23
3067 4th vec: 24 25 26 27 28 29 30 31
3068
3069 The output sequence should be:
3070
3071 1st vec: 0 4 8 12 16 20 24 28
3072 2nd vec: 1 5 9 13 17 21 25 29
3073 3rd vec: 2 6 10 14 18 22 26 30
3074 4th vec: 3 7 11 15 19 23 27 31
3075
3076 i.e., the first output vector should contain the first elements of each
3077 interleaving group, etc.
3078
3079 We use extract_even/odd instructions to create such output. The input of each
3080 extract_even/odd operation is two vectors
3081 1st vec 2nd vec
3082 0 1 2 3 4 5 6 7
3083
3084 and the output is the vector of extracted even/odd elements. The output of
3085 extract_even will be: 0 2 4 6
3086 and of extract_odd: 1 3 5 7
3087
3088
3089 The permutation is done in log LENGTH stages. In each stage extract_even and
3090 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3091 order. In our example,
3092
3093 E1: extract_even (1st vec, 2nd vec)
3094 E2: extract_odd (1st vec, 2nd vec)
3095 E3: extract_even (3rd vec, 4th vec)
3096 E4: extract_odd (3rd vec, 4th vec)
3097
3098 The output for the first stage will be:
3099
3100 E1: 0 2 4 6 8 10 12 14
3101 E2: 1 3 5 7 9 11 13 15
3102 E3: 16 18 20 22 24 26 28 30
3103 E4: 17 19 21 23 25 27 29 31
3104
3105 In order to proceed and create the correct sequence for the next stage (or
3106 for the correct output, if the second stage is the last one, as in our
3107 example), we first put the output of extract_even operation and then the
3108 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3109 The input for the second stage is:
3110
3111 1st vec (E1): 0 2 4 6 8 10 12 14
3112 2nd vec (E3): 16 18 20 22 24 26 28 30
3113 3rd vec (E2): 1 3 5 7 9 11 13 15
3114 4th vec (E4): 17 19 21 23 25 27 29 31
3115
3116 The output of the second stage:
3117
3118 E1: 0 4 8 12 16 20 24 28
3119 E2: 2 6 10 14 18 22 26 30
3120 E3: 1 5 9 13 17 21 25 29
3121 E4: 3 7 11 15 19 23 27 31
3122
3123 And RESULT_CHAIN after reordering:
3124
3125 1st vec (E1): 0 4 8 12 16 20 24 28
3126 2nd vec (E3): 1 5 9 13 17 21 25 29
3127 3rd vec (E2): 2 6 10 14 18 22 26 30
3128 4th vec (E4): 3 7 11 15 19 23 27 31. */
3129
3130 static bool
3131 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3132 unsigned int length,
3133 tree stmt,
3134 block_stmt_iterator *bsi,
3135 VEC(tree,heap) **result_chain)
3136 {
3137 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
3138 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3139 int i;
3140 unsigned int j;
3141
3142 /* Check that the operation is supported. */
3143 if (!vect_strided_load_supported (vectype))
3144 return false;
3145
3146 *result_chain = VEC_copy (tree, heap, dr_chain);
3147 for (i = 0; i < exact_log2 (length); i++)
3148 {
3149 for (j = 0; j < length; j +=2)
3150 {
3151 first_vect = VEC_index (tree, dr_chain, j);
3152 second_vect = VEC_index (tree, dr_chain, j+1);
3153
3154 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3155 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3156 add_referenced_var (perm_dest);
3157
3158 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
3159 build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
3160 first_vect, second_vect));
3161
3162 data_ref = make_ssa_name (perm_dest, perm_stmt);
3163 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
3164 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3165 mark_symbols_for_renaming (perm_stmt);
3166
3167 VEC_replace (tree, *result_chain, j/2, data_ref);
3168
3169 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3170 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3171 add_referenced_var (perm_dest);
3172
3173 perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
3174 build2 (VEC_EXTRACT_ODD_EXPR, vectype,
3175 first_vect, second_vect));
3176 data_ref = make_ssa_name (perm_dest, perm_stmt);
3177 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
3178 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3179 mark_symbols_for_renaming (perm_stmt);
3180
3181 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3182 }
3183 dr_chain = VEC_copy (tree, heap, *result_chain);
3184 }
3185 return true;
3186 }
3187
3188
3189 /* Function vect_transform_strided_load.
3190
3191 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3192 to perform their permutation and ascribe the result vectorized statements to
3193 the scalar statements.
3194 */
3195
3196 static bool
3197 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
3198 block_stmt_iterator *bsi)
3199 {
3200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3201 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3202 tree next_stmt, new_stmt;
3203 VEC(tree,heap) *result_chain = NULL;
3204 unsigned int i, gap_count;
3205 tree tmp_data_ref;
3206
3207 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3208 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3209 vectors, that are ready for vector computation. */
3210 result_chain = VEC_alloc (tree, heap, size);
3211 /* Permute. */
3212 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
3213 return false;
3214
3215 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3216 Since we scan the chain starting from it's first node, their order
3217 corresponds the order of data-refs in RESULT_CHAIN. */
3218 next_stmt = first_stmt;
3219 gap_count = 1;
3220 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
3221 {
3222 if (!next_stmt)
3223 break;
3224
3225 /* Skip the gaps. Loads created for the gaps will be removed by dead
3226 code elimination pass later.
3227 DR_GROUP_GAP is the number of steps in elements from the previous
3228 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3229 correspond to the gaps.
3230 */
3231 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3232 {
3233 gap_count++;
3234 continue;
3235 }
3236
3237 while (next_stmt)
3238 {
3239 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3240 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3241 copies, and we put the new vector statement in the first available
3242 RELATED_STMT. */
3243 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3244 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3245 else
3246 {
3247 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3248 tree rel_stmt = STMT_VINFO_RELATED_STMT (
3249 vinfo_for_stmt (prev_stmt));
3250 while (rel_stmt)
3251 {
3252 prev_stmt = rel_stmt;
3253 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3254 }
3255 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
3256 }
3257 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3258 gap_count = 1;
3259 /* If NEXT_STMT accesses the same DR as the previous statement,
3260 put the same TMP_DATA_REF as its vectorized statement; otherwise
3261 get the next data-ref from RESULT_CHAIN. */
3262 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3263 break;
3264 }
3265 }
3266 return true;
3267 }
3268
3269
3270 /* vectorizable_load.
3271
3272 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3273 can be vectorized.
3274 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3275 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3276 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3277
3278 bool
3279 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3280 {
3281 tree scalar_dest;
3282 tree vec_dest = NULL;
3283 tree data_ref = NULL;
3284 tree op;
3285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3286 stmt_vec_info prev_stmt_info;
3287 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3288 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3289 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3290 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3291 tree new_temp;
3292 int mode;
3293 tree new_stmt = NULL_TREE;
3294 tree dummy;
3295 enum dr_alignment_support alignment_support_cheme;
3296 tree dataref_ptr = NULL_TREE;
3297 tree ptr_incr;
3298 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3299 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3300 int i, j, group_size;
3301 tree msq = NULL_TREE, lsq;
3302 tree offset = NULL_TREE;
3303 tree realignment_token = NULL_TREE;
3304 tree phi_stmt = NULL_TREE;
3305 VEC(tree,heap) *dr_chain = NULL;
3306 bool strided_load = false;
3307 tree first_stmt;
3308
3309 /* Is vectorizable load? */
3310 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3311 return false;
3312
3313 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
3314
3315 if (STMT_VINFO_LIVE_P (stmt_info))
3316 {
3317 /* FORNOW: not yet supported. */
3318 if (vect_print_dump_info (REPORT_DETAILS))
3319 fprintf (vect_dump, "value used after loop.");
3320 return false;
3321 }
3322
3323 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3324 return false;
3325
3326 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3327 if (TREE_CODE (scalar_dest) != SSA_NAME)
3328 return false;
3329
3330 op = GIMPLE_STMT_OPERAND (stmt, 1);
3331 if (TREE_CODE (op) != ARRAY_REF
3332 && TREE_CODE (op) != INDIRECT_REF
3333 && !DR_GROUP_FIRST_DR (stmt_info))
3334 return false;
3335
3336 if (!STMT_VINFO_DATA_REF (stmt_info))
3337 return false;
3338
3339 mode = (int) TYPE_MODE (vectype);
3340
3341 /* FORNOW. In some cases can vectorize even if data-type not supported
3342 (e.g. - data copies). */
3343 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
3344 {
3345 if (vect_print_dump_info (REPORT_DETAILS))
3346 fprintf (vect_dump, "Aligned load, but unsupported type.");
3347 return false;
3348 }
3349
3350 /* Check if the load is a part of an interleaving chain. */
3351 if (DR_GROUP_FIRST_DR (stmt_info))
3352 {
3353 strided_load = true;
3354
3355 /* Check if interleaving is supported. */
3356 if (!vect_strided_load_supported (vectype))
3357 return false;
3358 }
3359
3360 if (!vec_stmt) /* transformation not required. */
3361 {
3362 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3363 return true;
3364 }
3365
3366 /** Transform. **/
3367
3368 if (vect_print_dump_info (REPORT_DETAILS))
3369 fprintf (vect_dump, "transform load.");
3370
3371 if (strided_load)
3372 {
3373 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3374 /* Check if the chain of loads is already vectorized. */
3375 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3376 {
3377 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3378 return true;
3379 }
3380 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3381 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3382 dr_chain = VEC_alloc (tree, heap, group_size);
3383 }
3384 else
3385 {
3386 first_stmt = stmt;
3387 first_dr = dr;
3388 group_size = 1;
3389 }
3390
3391 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3392 gcc_assert (alignment_support_cheme);
3393
3394
3395 /* In case the vectorization factor (VF) is bigger than the number
3396 of elements that we can fit in a vectype (nunits), we have to generate
3397 more than one vector stmt - i.e - we need to "unroll" the
3398 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3399 from one copy of the vector stmt to the next, in the field
3400 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3401 stages to find the correct vector defs to be used when vectorizing
3402 stmts that use the defs of the current stmt. The example below illustrates
3403 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3404 4 vectorized stmts):
3405
3406 before vectorization:
3407 RELATED_STMT VEC_STMT
3408 S1: x = memref - -
3409 S2: z = x + 1 - -
3410
3411 step 1: vectorize stmt S1:
3412 We first create the vector stmt VS1_0, and, as usual, record a
3413 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3414 Next, we create the vector stmt VS1_1, and record a pointer to
3415 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3416 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3417 stmts and pointers:
3418 RELATED_STMT VEC_STMT
3419 VS1_0: vx0 = memref0 VS1_1 -
3420 VS1_1: vx1 = memref1 VS1_2 -
3421 VS1_2: vx2 = memref2 VS1_3 -
3422 VS1_3: vx3 = memref3 - -
3423 S1: x = load - VS1_0
3424 S2: z = x + 1 - -
3425
3426 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3427 information we recorded in RELATED_STMT field is used to vectorize
3428 stmt S2. */
3429
3430 /* In case of interleaving (non-unit strided access):
3431
3432 S1: x2 = &base + 2
3433 S2: x0 = &base
3434 S3: x1 = &base + 1
3435 S4: x3 = &base + 3
3436
3437 Vectorized loads are created in the order of memory accesses
3438 starting from the access of the first stmt of the chain:
3439
3440 VS1: vx0 = &base
3441 VS2: vx1 = &base + vec_size*1
3442 VS3: vx3 = &base + vec_size*2
3443 VS4: vx4 = &base + vec_size*3
3444
3445 Then permutation statements are generated:
3446
3447 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3448 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3449 ...
3450
3451 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3452 (the order of the data-refs in the output of vect_permute_load_chain
3453 corresponds to the order of scalar stmts in the interleaving chain - see
3454 the documentation of vect_permute_load_chain()).
3455 The generation of permutation stmts and recording them in
3456 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3457
3458 In case of both multiple types and interleaving, the vector loads and
3459 permutation stmts above are created for every copy. The result vector stmts
3460 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3461 STMT_VINFO_RELATED_STMT for the next copies. */
3462
3463 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3464 on a target that supports unaligned accesses (dr_unaligned_supported)
3465 we generate the following code:
3466 p = initial_addr;
3467 indx = 0;
3468 loop {
3469 p = p + indx * vectype_size;
3470 vec_dest = *(p);
3471 indx = indx + 1;
3472 }
3473
3474 Otherwise, the data reference is potentially unaligned on a target that
3475 does not support unaligned accesses (dr_unaligned_software_pipeline) -
3476 then generate the following code, in which the data in each iteration is
3477 obtained by two vector loads, one from the previous iteration, and one
3478 from the current iteration:
3479 p1 = initial_addr;
3480 msq_init = *(floor(p1))
3481 p2 = initial_addr + VS - 1;
3482 realignment_token = call target_builtin;
3483 indx = 0;
3484 loop {
3485 p2 = p2 + indx * vectype_size
3486 lsq = *(floor(p2))
3487 vec_dest = realign_load (msq, lsq, realignment_token)
3488 indx = indx + 1;
3489 msq = lsq;
3490 } */
3491
3492 if (alignment_support_cheme == dr_unaligned_software_pipeline)
3493 {
3494 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
3495 phi_stmt = SSA_NAME_DEF_STMT (msq);
3496 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3497 }
3498
3499 prev_stmt_info = NULL;
3500 for (j = 0; j < ncopies; j++)
3501 {
3502 /* 1. Create the vector pointer update chain. */
3503 if (j == 0)
3504 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
3505 &ptr_incr, false, NULL_TREE);
3506 else
3507 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3508
3509 for (i = 0; i < group_size; i++)
3510 {
3511 /* 2. Create the vector-load in the loop. */
3512 switch (alignment_support_cheme)
3513 {
3514 case dr_aligned:
3515 gcc_assert (aligned_access_p (first_dr));
3516 data_ref = build_fold_indirect_ref (dataref_ptr);
3517 break;
3518 case dr_unaligned_supported:
3519 {
3520 int mis = DR_MISALIGNMENT (first_dr);
3521 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3522
3523 gcc_assert (!aligned_access_p (first_dr));
3524 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3525 data_ref =
3526 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3527 break;
3528 }
3529 case dr_unaligned_software_pipeline:
3530 gcc_assert (!aligned_access_p (first_dr));
3531 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3532 break;
3533 default:
3534 gcc_unreachable ();
3535 }
3536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3537 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3538 data_ref);
3539 new_temp = make_ssa_name (vec_dest, new_stmt);
3540 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3541 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3542 copy_virtual_operands (new_stmt, stmt);
3543 mark_symbols_for_renaming (new_stmt);
3544
3545 /* 3. Handle explicit realignment if necessary/supported. */
3546 if (alignment_support_cheme == dr_unaligned_software_pipeline)
3547 {
3548 /* Create in loop:
3549 <vec_dest = realign_load (msq, lsq, realignment_token)> */
3550 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
3551 if (!realignment_token)
3552 realignment_token = dataref_ptr;
3553 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3554 new_stmt =
3555 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
3556 new_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3557 new_stmt);
3558 new_temp = make_ssa_name (vec_dest, new_stmt);
3559 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3560 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3561 if (i == group_size - 1 && j == ncopies - 1)
3562 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
3563 msq = lsq;
3564 }
3565 if (strided_load)
3566 VEC_quick_push (tree, dr_chain, new_temp);
3567 if (i < group_size - 1)
3568 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
3569 }
3570
3571 if (strided_load)
3572 {
3573 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
3574 return false;
3575 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3576 dr_chain = VEC_alloc (tree, heap, group_size);
3577 }
3578 else
3579 {
3580 if (j == 0)
3581 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3582 else
3583 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3584 prev_stmt_info = vinfo_for_stmt (new_stmt);
3585 }
3586 }
3587
3588 return true;
3589 }
3590
3591
3592 /* Function vectorizable_live_operation.
3593
3594 STMT computes a value that is used outside the loop. Check if
3595 it can be supported. */
3596
3597 bool
3598 vectorizable_live_operation (tree stmt,
3599 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3600 tree *vec_stmt ATTRIBUTE_UNUSED)
3601 {
3602 tree operation;
3603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3604 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3605 int i;
3606 enum tree_code code;
3607 int op_type;
3608 tree op;
3609 tree def, def_stmt;
3610 enum vect_def_type dt;
3611
3612 if (!STMT_VINFO_LIVE_P (stmt_info))
3613 return false;
3614
3615 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3616 return false;
3617
3618 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3619 return false;
3620
3621 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3622 code = TREE_CODE (operation);
3623
3624 op_type = TREE_CODE_LENGTH (code);
3625
3626 /* FORNOW: support only if all uses are invariant. This means
3627 that the scalar operations can remain in place, unvectorized.
3628 The original last scalar value that they compute will be used. */
3629
3630 for (i = 0; i < op_type; i++)
3631 {
3632 op = TREE_OPERAND (operation, i);
3633 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3634 {
3635 if (vect_print_dump_info (REPORT_DETAILS))
3636 fprintf (vect_dump, "use not simple.");
3637 return false;
3638 }
3639
3640 if (dt != vect_invariant_def && dt != vect_constant_def)
3641 return false;
3642 }
3643
3644 /* No transformation is required for the cases we currently support. */
3645 return true;
3646 }
3647
3648
3649 /* Function vect_is_simple_cond.
3650
3651 Input:
3652 LOOP - the loop that is being vectorized.
3653 COND - Condition that is checked for simple use.
3654
3655 Returns whether a COND can be vectorized. Checks whether
3656 condition operands are supportable using vec_is_simple_use. */
3657
3658 static bool
3659 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3660 {
3661 tree lhs, rhs;
3662 tree def;
3663 enum vect_def_type dt;
3664
3665 if (!COMPARISON_CLASS_P (cond))
3666 return false;
3667
3668 lhs = TREE_OPERAND (cond, 0);
3669 rhs = TREE_OPERAND (cond, 1);
3670
3671 if (TREE_CODE (lhs) == SSA_NAME)
3672 {
3673 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3674 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3675 return false;
3676 }
3677 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
3678 return false;
3679
3680 if (TREE_CODE (rhs) == SSA_NAME)
3681 {
3682 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3683 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3684 return false;
3685 }
3686 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
3687 return false;
3688
3689 return true;
3690 }
3691
3692 /* vectorizable_condition.
3693
3694 Check if STMT is conditional modify expression that can be vectorized.
3695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3696 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3697 at BSI.
3698
3699 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3700
3701 bool
3702 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3703 {
3704 tree scalar_dest = NULL_TREE;
3705 tree vec_dest = NULL_TREE;
3706 tree op = NULL_TREE;
3707 tree cond_expr, then_clause, else_clause;
3708 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3709 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3710 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3711 tree vec_compare, vec_cond_expr;
3712 tree new_temp;
3713 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3714 enum machine_mode vec_mode;
3715 tree def;
3716 enum vect_def_type dt;
3717 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3718 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3719
3720 gcc_assert (ncopies >= 1);
3721 if (ncopies > 1)
3722 return false; /* FORNOW */
3723
3724 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3725 return false;
3726
3727 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
3728
3729 if (STMT_VINFO_LIVE_P (stmt_info))
3730 {
3731 /* FORNOW: not yet supported. */
3732 if (vect_print_dump_info (REPORT_DETAILS))
3733 fprintf (vect_dump, "value used after loop.");
3734 return false;
3735 }
3736
3737 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3738 return false;
3739
3740 op = GIMPLE_STMT_OPERAND (stmt, 1);
3741
3742 if (TREE_CODE (op) != COND_EXPR)
3743 return false;
3744
3745 cond_expr = TREE_OPERAND (op, 0);
3746 then_clause = TREE_OPERAND (op, 1);
3747 else_clause = TREE_OPERAND (op, 2);
3748
3749 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3750 return false;
3751
3752 /* We do not handle two different vector types for the condition
3753 and the values. */
3754 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3755 return false;
3756
3757 if (TREE_CODE (then_clause) == SSA_NAME)
3758 {
3759 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3760 if (!vect_is_simple_use (then_clause, loop_vinfo,
3761 &then_def_stmt, &def, &dt))
3762 return false;
3763 }
3764 else if (TREE_CODE (then_clause) != INTEGER_CST
3765 && TREE_CODE (then_clause) != REAL_CST)
3766 return false;
3767
3768 if (TREE_CODE (else_clause) == SSA_NAME)
3769 {
3770 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3771 if (!vect_is_simple_use (else_clause, loop_vinfo,
3772 &else_def_stmt, &def, &dt))
3773 return false;
3774 }
3775 else if (TREE_CODE (else_clause) != INTEGER_CST
3776 && TREE_CODE (else_clause) != REAL_CST)
3777 return false;
3778
3779
3780 vec_mode = TYPE_MODE (vectype);
3781
3782 if (!vec_stmt)
3783 {
3784 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3785 return expand_vec_cond_expr_p (op, vec_mode);
3786 }
3787
3788 /* Transform */
3789
3790 /* Handle def. */
3791 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3792 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3793
3794 /* Handle cond expr. */
3795 vec_cond_lhs =
3796 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3797 vec_cond_rhs =
3798 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3799 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3800 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3801
3802 /* Arguments are ready. create the new vector stmt. */
3803 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3804 vec_cond_lhs, vec_cond_rhs);
3805 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3806 vec_compare, vec_then_clause, vec_else_clause);
3807
3808 *vec_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, vec_dest,
3809 vec_cond_expr);
3810 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3811 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3812 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3813
3814 return true;
3815 }
3816
3817 /* Function vect_transform_stmt.
3818
3819 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3820
3821 bool
3822 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
3823 {
3824 bool is_store = false;
3825 tree vec_stmt = NULL_TREE;
3826 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3827 tree orig_stmt_in_pattern;
3828 bool done;
3829
3830 if (STMT_VINFO_RELEVANT_P (stmt_info))
3831 {
3832 switch (STMT_VINFO_TYPE (stmt_info))
3833 {
3834 case type_demotion_vec_info_type:
3835 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
3836 gcc_assert (done);
3837 break;
3838
3839 case type_promotion_vec_info_type:
3840 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
3841 gcc_assert (done);
3842 break;
3843
3844 case op_vec_info_type:
3845 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3846 gcc_assert (done);
3847 break;
3848
3849 case assignment_vec_info_type:
3850 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3851 gcc_assert (done);
3852 break;
3853
3854 case load_vec_info_type:
3855 done = vectorizable_load (stmt, bsi, &vec_stmt);
3856 gcc_assert (done);
3857 break;
3858
3859 case store_vec_info_type:
3860 done = vectorizable_store (stmt, bsi, &vec_stmt);
3861 gcc_assert (done);
3862 if (DR_GROUP_FIRST_DR (stmt_info))
3863 {
3864 /* In case of interleaving, the whole chain is vectorized when the
3865 last store in the chain is reached. Store stmts before the last
3866 one are skipped, and there vec_stmt_info shouldn't be freed
3867 meanwhile. */
3868 *strided_store = true;
3869 if (STMT_VINFO_VEC_STMT (stmt_info))
3870 is_store = true;
3871 }
3872 else
3873 is_store = true;
3874 break;
3875
3876 case condition_vec_info_type:
3877 done = vectorizable_condition (stmt, bsi, &vec_stmt);
3878 gcc_assert (done);
3879 break;
3880
3881 case call_vec_info_type:
3882 done = vectorizable_call (stmt, bsi, &vec_stmt);
3883 break;
3884
3885 default:
3886 if (vect_print_dump_info (REPORT_DETAILS))
3887 fprintf (vect_dump, "stmt not supported.");
3888 gcc_unreachable ();
3889 }
3890
3891 gcc_assert (vec_stmt || *strided_store);
3892 if (vec_stmt)
3893 {
3894 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3895 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
3896 if (orig_stmt_in_pattern)
3897 {
3898 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
3899 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3900 {
3901 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3902
3903 /* STMT was inserted by the vectorizer to replace a
3904 computation idiom. ORIG_STMT_IN_PATTERN is a stmt in the
3905 original sequence that computed this idiom. We need to
3906 record a pointer to VEC_STMT in the stmt_info of
3907 ORIG_STMT_IN_PATTERN. See more details in the
3908 documentation of vect_pattern_recog. */
3909
3910 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
3911 }
3912 }
3913 }
3914 }
3915
3916 if (STMT_VINFO_LIVE_P (stmt_info))
3917 {
3918 switch (STMT_VINFO_TYPE (stmt_info))
3919 {
3920 case reduc_vec_info_type:
3921 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
3922 gcc_assert (done);
3923 break;
3924
3925 default:
3926 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
3927 gcc_assert (done);
3928 }
3929 }
3930
3931 return is_store;
3932 }
3933
3934
3935 /* This function builds ni_name = number of iterations loop executes
3936 on the loop preheader. */
3937
3938 static tree
3939 vect_build_loop_niters (loop_vec_info loop_vinfo)
3940 {
3941 tree ni_name, stmt, var;
3942 edge pe;
3943 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3944 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3945
3946 var = create_tmp_var (TREE_TYPE (ni), "niters");
3947 add_referenced_var (var);
3948 ni_name = force_gimple_operand (ni, &stmt, false, var);
3949
3950 pe = loop_preheader_edge (loop);
3951 if (stmt)
3952 {
3953 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3954 gcc_assert (!new_bb);
3955 }
3956
3957 return ni_name;
3958 }
3959
3960
3961 /* This function generates the following statements:
3962
3963 ni_name = number of iterations loop executes
3964 ratio = ni_name / vf
3965 ratio_mult_vf_name = ratio * vf
3966
3967 and places them at the loop preheader edge. */
3968
3969 static void
3970 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3971 tree *ni_name_ptr,
3972 tree *ratio_mult_vf_name_ptr,
3973 tree *ratio_name_ptr)
3974 {
3975
3976 edge pe;
3977 basic_block new_bb;
3978 tree stmt, ni_name;
3979 tree var;
3980 tree ratio_name;
3981 tree ratio_mult_vf_name;
3982 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3983 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3984 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3985 tree log_vf;
3986
3987 pe = loop_preheader_edge (loop);
3988
3989 /* Generate temporary variable that contains
3990 number of iterations loop executes. */
3991
3992 ni_name = vect_build_loop_niters (loop_vinfo);
3993 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
3994
3995 /* Create: ratio = ni >> log2(vf) */
3996
3997 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
3998 if (!is_gimple_val (ratio_name))
3999 {
4000 var = create_tmp_var (TREE_TYPE (ni), "bnd");
4001 add_referenced_var (var);
4002
4003 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
4004 pe = loop_preheader_edge (loop);
4005 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4006 gcc_assert (!new_bb);
4007 }
4008
4009 /* Create: ratio_mult_vf = ratio << log2 (vf). */
4010
4011 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
4012 ratio_name, log_vf);
4013 if (!is_gimple_val (ratio_mult_vf_name))
4014 {
4015 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
4016 add_referenced_var (var);
4017
4018 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
4019 true, var);
4020 pe = loop_preheader_edge (loop);
4021 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4022 gcc_assert (!new_bb);
4023 }
4024
4025 *ni_name_ptr = ni_name;
4026 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
4027 *ratio_name_ptr = ratio_name;
4028
4029 return;
4030 }
4031
4032
4033 /* Function update_vuses_to_preheader.
4034
4035 Input:
4036 STMT - a statement with potential VUSEs.
4037 LOOP - the loop whose preheader will contain STMT.
4038
4039 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
4040 appears to be defined in a VDEF in another statement in a loop.
4041 One such case is when the VUSE is at the dereference of a __restricted__
4042 pointer in a load and the VDEF is at the dereference of a different
4043 __restricted__ pointer in a store. Vectorization may result in
4044 copy_virtual_uses being called to copy the problematic VUSE to a new
4045 statement that is being inserted in the loop preheader. This procedure
4046 is called to change the SSA_NAME in the new statement's VUSE from the
4047 SSA_NAME updated in the loop to the related SSA_NAME available on the
4048 path entering the loop.
4049
4050 When this function is called, we have the following situation:
4051
4052 # vuse <name1>
4053 S1: vload
4054 do {
4055 # name1 = phi < name0 , name2>
4056
4057 # vuse <name1>
4058 S2: vload
4059
4060 # name2 = vdef <name1>
4061 S3: vstore
4062
4063 }while...
4064
4065 Stmt S1 was created in the loop preheader block as part of misaligned-load
4066 handling. This function fixes the name of the vuse of S1 from 'name1' to
4067 'name0'. */
4068
4069 static void
4070 update_vuses_to_preheader (tree stmt, struct loop *loop)
4071 {
4072 basic_block header_bb = loop->header;
4073 edge preheader_e = loop_preheader_edge (loop);
4074 ssa_op_iter iter;
4075 use_operand_p use_p;
4076
4077 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
4078 {
4079 tree ssa_name = USE_FROM_PTR (use_p);
4080 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
4081 tree name_var = SSA_NAME_VAR (ssa_name);
4082 basic_block bb = bb_for_stmt (def_stmt);
4083
4084 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
4085 if (!IS_EMPTY_STMT (def_stmt)
4086 && flow_bb_inside_loop_p (loop, bb))
4087 {
4088 /* If the block containing the statement defining the SSA_NAME
4089 is in the loop then it's necessary to find the definition
4090 outside the loop using the PHI nodes of the header. */
4091 tree phi;
4092 bool updated = false;
4093
4094 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
4095 {
4096 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
4097 {
4098 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
4099 updated = true;
4100 break;
4101 }
4102 }
4103 gcc_assert (updated);
4104 }
4105 }
4106 }
4107
4108
4109 /* Function vect_update_ivs_after_vectorizer.
4110
4111 "Advance" the induction variables of LOOP to the value they should take
4112 after the execution of LOOP. This is currently necessary because the
4113 vectorizer does not handle induction variables that are used after the
4114 loop. Such a situation occurs when the last iterations of LOOP are
4115 peeled, because:
4116 1. We introduced new uses after LOOP for IVs that were not originally used
4117 after LOOP: the IVs of LOOP are now used by an epilog loop.
4118 2. LOOP is going to be vectorized; this means that it will iterate N/VF
4119 times, whereas the loop IVs should be bumped N times.
4120
4121 Input:
4122 - LOOP - a loop that is going to be vectorized. The last few iterations
4123 of LOOP were peeled.
4124 - NITERS - the number of iterations that LOOP executes (before it is
4125 vectorized). i.e, the number of times the ivs should be bumped.
4126 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
4127 coming out from LOOP on which there are uses of the LOOP ivs
4128 (this is the path from LOOP->exit to epilog_loop->preheader).
4129
4130 The new definitions of the ivs are placed in LOOP->exit.
4131 The phi args associated with the edge UPDATE_E in the bb
4132 UPDATE_E->dest are updated accordingly.
4133
4134 Assumption 1: Like the rest of the vectorizer, this function assumes
4135 a single loop exit that has a single predecessor.
4136
4137 Assumption 2: The phi nodes in the LOOP header and in update_bb are
4138 organized in the same order.
4139
4140 Assumption 3: The access function of the ivs is simple enough (see
4141 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
4142
4143 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
4144 coming out of LOOP on which the ivs of LOOP are used (this is the path
4145 that leads to the epilog loop; other paths skip the epilog loop). This
4146 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
4147 needs to have its phis updated.
4148 */
4149
4150 static void
4151 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
4152 edge update_e)
4153 {
4154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4155 basic_block exit_bb = single_exit (loop)->dest;
4156 tree phi, phi1;
4157 basic_block update_bb = update_e->dest;
4158
4159 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
4160
4161 /* Make sure there exists a single-predecessor exit bb: */
4162 gcc_assert (single_pred_p (exit_bb));
4163
4164 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
4165 phi && phi1;
4166 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
4167 {
4168 tree access_fn = NULL;
4169 tree evolution_part;
4170 tree init_expr;
4171 tree step_expr;
4172 tree var, stmt, ni, ni_name;
4173 block_stmt_iterator last_bsi;
4174
4175 if (vect_print_dump_info (REPORT_DETAILS))
4176 {
4177 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
4178 print_generic_expr (vect_dump, phi, TDF_SLIM);
4179 }
4180
4181 /* Skip virtual phi's. */
4182 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4183 {
4184 if (vect_print_dump_info (REPORT_DETAILS))
4185 fprintf (vect_dump, "virtual phi. skip.");
4186 continue;
4187 }
4188
4189 /* Skip reduction phis. */
4190 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
4191 {
4192 if (vect_print_dump_info (REPORT_DETAILS))
4193 fprintf (vect_dump, "reduc phi. skip.");
4194 continue;
4195 }
4196
4197 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
4198 gcc_assert (access_fn);
4199 evolution_part =
4200 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
4201 gcc_assert (evolution_part != NULL_TREE);
4202
4203 /* FORNOW: We do not support IVs whose evolution function is a polynomial
4204 of degree >= 2 or exponential. */
4205 gcc_assert (!tree_is_chrec (evolution_part));
4206
4207 step_expr = evolution_part;
4208 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
4209 loop->num));
4210
4211 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
4212 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
4213 fold_convert (TREE_TYPE (init_expr),
4214 niters),
4215 step_expr),
4216 init_expr);
4217
4218 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
4219 add_referenced_var (var);
4220
4221 ni_name = force_gimple_operand (ni, &stmt, false, var);
4222
4223 /* Insert stmt into exit_bb. */
4224 last_bsi = bsi_last (exit_bb);
4225 if (stmt)
4226 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
4227
4228 /* Fix phi expressions in the successor bb. */
4229 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
4230 }
4231 }
4232
4233
4234 /* Function vect_do_peeling_for_loop_bound
4235
4236 Peel the last iterations of the loop represented by LOOP_VINFO.
4237 The peeled iterations form a new epilog loop. Given that the loop now
4238 iterates NITERS times, the new epilog loop iterates
4239 NITERS % VECTORIZATION_FACTOR times.
4240
4241 The original loop will later be made to iterate
4242 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
4243
4244 static void
4245 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
4246 {
4247 tree ni_name, ratio_mult_vf_name;
4248 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4249 struct loop *new_loop;
4250 edge update_e;
4251 basic_block preheader;
4252 int loop_num;
4253
4254 if (vect_print_dump_info (REPORT_DETAILS))
4255 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
4256
4257 initialize_original_copy_tables ();
4258
4259 /* Generate the following variables on the preheader of original loop:
4260
4261 ni_name = number of iteration the original loop executes
4262 ratio = ni_name / vf
4263 ratio_mult_vf_name = ratio * vf */
4264 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
4265 &ratio_mult_vf_name, ratio);
4266
4267 loop_num = loop->num;
4268 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
4269 ratio_mult_vf_name, ni_name, false);
4270 gcc_assert (new_loop);
4271 gcc_assert (loop_num == loop->num);
4272 #ifdef ENABLE_CHECKING
4273 slpeel_verify_cfg_after_peeling (loop, new_loop);
4274 #endif
4275
4276 /* A guard that controls whether the new_loop is to be executed or skipped
4277 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
4278 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
4279 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
4280 is on the path where the LOOP IVs are used and need to be updated. */
4281
4282 preheader = loop_preheader_edge (new_loop)->src;
4283 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
4284 update_e = EDGE_PRED (preheader, 0);
4285 else
4286 update_e = EDGE_PRED (preheader, 1);
4287
4288 /* Update IVs of original loop as if they were advanced
4289 by ratio_mult_vf_name steps. */
4290 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
4291
4292 /* After peeling we have to reset scalar evolution analyzer. */
4293 scev_reset ();
4294
4295 free_original_copy_tables ();
4296 }
4297
4298
4299 /* Function vect_gen_niters_for_prolog_loop
4300
4301 Set the number of iterations for the loop represented by LOOP_VINFO
4302 to the minimum between LOOP_NITERS (the original iteration count of the loop)
4303 and the misalignment of DR - the data reference recorded in
4304 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
4305 this loop, the data reference DR will refer to an aligned location.
4306
4307 The following computation is generated:
4308
4309 If the misalignment of DR is known at compile time:
4310 addr_mis = int mis = DR_MISALIGNMENT (dr);
4311 Else, compute address misalignment in bytes:
4312 addr_mis = addr & (vectype_size - 1)
4313
4314 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
4315
4316 (elem_size = element type size; an element is the scalar element
4317 whose type is the inner type of the vectype)
4318
4319 For interleaving,
4320
4321 prolog_niters = min ( LOOP_NITERS ,
4322 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
4323 where group_size is the size of the interleaved group.
4324 */
4325
4326 static tree
4327 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
4328 {
4329 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
4330 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4332 tree var, stmt;
4333 tree iters, iters_name;
4334 edge pe;
4335 basic_block new_bb;
4336 tree dr_stmt = DR_STMT (dr);
4337 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
4338 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4339 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
4340 tree niters_type = TREE_TYPE (loop_niters);
4341 int group_size = 1;
4342 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
4343
4344 if (DR_GROUP_FIRST_DR (stmt_info))
4345 {
4346 /* For interleaved access element size must be multiplied by the size of
4347 the interleaved group. */
4348 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
4349 DR_GROUP_FIRST_DR (stmt_info)));
4350 element_size *= group_size;
4351 }
4352
4353 pe = loop_preheader_edge (loop);
4354
4355 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
4356 {
4357 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
4358 int elem_misalign = byte_misalign / element_size;
4359
4360 if (vect_print_dump_info (REPORT_DETAILS))
4361 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
4362 iters = build_int_cst (niters_type,
4363 (vf - elem_misalign)&(vf/group_size-1));
4364 }
4365 else
4366 {
4367 tree new_stmts = NULL_TREE;
4368 tree start_addr =
4369 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
4370 tree ptr_type = TREE_TYPE (start_addr);
4371 tree size = TYPE_SIZE (ptr_type);
4372 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
4373 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
4374 tree elem_size_log =
4375 build_int_cst (type, exact_log2 (vectype_align/vf));
4376 tree vf_minus_1 = build_int_cst (type, vf - 1);
4377 tree vf_tree = build_int_cst (type, vf);
4378 tree byte_misalign;
4379 tree elem_misalign;
4380
4381 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
4382 gcc_assert (!new_bb);
4383
4384 /* Create: byte_misalign = addr & (vectype_size - 1) */
4385 byte_misalign =
4386 fold_build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
4387
4388 /* Create: elem_misalign = byte_misalign / element_size */
4389 elem_misalign =
4390 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
4391
4392 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
4393 iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
4394 iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
4395 iters = fold_convert (niters_type, iters);
4396 }
4397
4398 /* Create: prolog_loop_niters = min (iters, loop_niters) */
4399 /* If the loop bound is known at compile time we already verified that it is
4400 greater than vf; since the misalignment ('iters') is at most vf, there's
4401 no need to generate the MIN_EXPR in this case. */
4402 if (TREE_CODE (loop_niters) != INTEGER_CST)
4403 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
4404
4405 if (vect_print_dump_info (REPORT_DETAILS))
4406 {
4407 fprintf (vect_dump, "niters for prolog loop: ");
4408 print_generic_expr (vect_dump, iters, TDF_SLIM);
4409 }
4410
4411 var = create_tmp_var (niters_type, "prolog_loop_niters");
4412 add_referenced_var (var);
4413 iters_name = force_gimple_operand (iters, &stmt, false, var);
4414
4415 /* Insert stmt on loop preheader edge. */
4416 if (stmt)
4417 {
4418 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
4419 gcc_assert (!new_bb);
4420 }
4421
4422 return iters_name;
4423 }
4424
4425
4426 /* Function vect_update_init_of_dr
4427
4428 NITERS iterations were peeled from LOOP. DR represents a data reference
4429 in LOOP. This function updates the information recorded in DR to
4430 account for the fact that the first NITERS iterations had already been
4431 executed. Specifically, it updates the OFFSET field of DR. */
4432
4433 static void
4434 vect_update_init_of_dr (struct data_reference *dr, tree niters)
4435 {
4436 tree offset = DR_OFFSET (dr);
4437
4438 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
4439 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
4440 DR_OFFSET (dr) = offset;
4441 }
4442
4443
4444 /* Function vect_update_inits_of_drs
4445
4446 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
4447 This function updates the information recorded for the data references in
4448 the loop to account for the fact that the first NITERS iterations had
4449 already been executed. Specifically, it updates the initial_condition of the
4450 access_function of all the data_references in the loop. */
4451
4452 static void
4453 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
4454 {
4455 unsigned int i;
4456 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
4457 struct data_reference *dr;
4458
4459 if (vect_dump && (dump_flags & TDF_DETAILS))
4460 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
4461
4462 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4463 vect_update_init_of_dr (dr, niters);
4464 }
4465
4466
4467 /* Function vect_do_peeling_for_alignment
4468
4469 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
4470 'niters' is set to the misalignment of one of the data references in the
4471 loop, thereby forcing it to refer to an aligned location at the beginning
4472 of the execution of this loop. The data reference for which we are
4473 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
4474
4475 static void
4476 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
4477 {
4478 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4479 tree niters_of_prolog_loop, ni_name;
4480 tree n_iters;
4481 struct loop *new_loop;
4482
4483 if (vect_print_dump_info (REPORT_DETAILS))
4484 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
4485
4486 initialize_original_copy_tables ();
4487
4488 ni_name = vect_build_loop_niters (loop_vinfo);
4489 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
4490
4491 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
4492 new_loop =
4493 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
4494 niters_of_prolog_loop, ni_name, true);
4495 gcc_assert (new_loop);
4496 #ifdef ENABLE_CHECKING
4497 slpeel_verify_cfg_after_peeling (new_loop, loop);
4498 #endif
4499
4500 /* Update number of times loop executes. */
4501 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
4502 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
4503 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
4504
4505 /* Update the init conditions of the access functions of all data refs. */
4506 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
4507
4508 /* After peeling we have to reset scalar evolution analyzer. */
4509 scev_reset ();
4510
4511 free_original_copy_tables ();
4512 }
4513
4514
4515 /* Function vect_create_cond_for_align_checks.
4516
4517 Create a conditional expression that represents the alignment checks for
4518 all of data references (array element references) whose alignment must be
4519 checked at runtime.
4520
4521 Input:
4522 LOOP_VINFO - two fields of the loop information are used.
4523 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
4524 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
4525
4526 Output:
4527 COND_EXPR_STMT_LIST - statements needed to construct the conditional
4528 expression.
4529 The returned value is the conditional expression to be used in the if
4530 statement that controls which version of the loop gets executed at runtime.
4531
4532 The algorithm makes two assumptions:
4533 1) The number of bytes "n" in a vector is a power of 2.
4534 2) An address "a" is aligned if a%n is zero and that this
4535 test can be done as a&(n-1) == 0. For example, for 16
4536 byte vectors the test is a&0xf == 0. */
4537
4538 static tree
4539 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
4540 tree *cond_expr_stmt_list)
4541 {
4542 VEC(tree,heap) *may_misalign_stmts
4543 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
4544 tree ref_stmt;
4545 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
4546 tree mask_cst;
4547 unsigned int i;
4548 tree psize;
4549 tree int_ptrsize_type;
4550 char tmp_name[20];
4551 tree or_tmp_name = NULL_TREE;
4552 tree and_tmp, and_tmp_name, and_stmt;
4553 tree ptrsize_zero;
4554
4555 /* Check that mask is one less than a power of 2, i.e., mask is
4556 all zeros followed by all ones. */
4557 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
4558
4559 /* CHECKME: what is the best integer or unsigned type to use to hold a
4560 cast from a pointer value? */
4561 psize = TYPE_SIZE (ptr_type_node);
4562 int_ptrsize_type
4563 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
4564
4565 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
4566 of the first vector of the i'th data reference. */
4567
4568 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
4569 {
4570 tree new_stmt_list = NULL_TREE;
4571 tree addr_base;
4572 tree addr_tmp, addr_tmp_name, addr_stmt;
4573 tree or_tmp, new_or_tmp_name, or_stmt;
4574
4575 /* create: addr_tmp = (int)(address_of_first_vector) */
4576 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
4577 &new_stmt_list,
4578 NULL_TREE);
4579
4580 if (new_stmt_list != NULL_TREE)
4581 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
4582
4583 sprintf (tmp_name, "%s%d", "addr2int", i);
4584 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
4585 add_referenced_var (addr_tmp);
4586 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
4587 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
4588 addr_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
4589 addr_tmp_name, addr_stmt);
4590 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
4591 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
4592
4593 /* The addresses are OR together. */
4594
4595 if (or_tmp_name != NULL_TREE)
4596 {
4597 /* create: or_tmp = or_tmp | addr_tmp */
4598 sprintf (tmp_name, "%s%d", "orptrs", i);
4599 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
4600 add_referenced_var (or_tmp);
4601 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
4602 or_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
4603 new_or_tmp_name,
4604 build2 (BIT_IOR_EXPR, int_ptrsize_type,
4605 or_tmp_name,
4606 addr_tmp_name));
4607 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
4608 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
4609 or_tmp_name = new_or_tmp_name;
4610 }
4611 else
4612 or_tmp_name = addr_tmp_name;
4613
4614 } /* end for i */
4615
4616 mask_cst = build_int_cst (int_ptrsize_type, mask);
4617
4618 /* create: and_tmp = or_tmp & mask */
4619 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
4620 add_referenced_var (and_tmp);
4621 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
4622
4623 and_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node,
4624 and_tmp_name,
4625 build2 (BIT_AND_EXPR, int_ptrsize_type,
4626 or_tmp_name, mask_cst));
4627 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
4628 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
4629
4630 /* Make and_tmp the left operand of the conditional test against zero.
4631 if and_tmp has a nonzero bit then some address is unaligned. */
4632 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
4633 return build2 (EQ_EXPR, boolean_type_node,
4634 and_tmp_name, ptrsize_zero);
4635 }
4636
4637
4638 /* Function vect_transform_loop.
4639
4640 The analysis phase has determined that the loop is vectorizable.
4641 Vectorize the loop - created vectorized stmts to replace the scalar
4642 stmts in the loop, and update the loop exit condition. */
4643
4644 void
4645 vect_transform_loop (loop_vec_info loop_vinfo)
4646 {
4647 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4648 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4649 int nbbs = loop->num_nodes;
4650 block_stmt_iterator si;
4651 int i;
4652 tree ratio = NULL;
4653 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4654 bool strided_store;
4655
4656 if (vect_print_dump_info (REPORT_DETAILS))
4657 fprintf (vect_dump, "=== vec_transform_loop ===");
4658
4659 /* If the loop has data references that may or may not be aligned then
4660 two versions of the loop need to be generated, one which is vectorized
4661 and one which isn't. A test is then generated to control which of the
4662 loops is executed. The test checks for the alignment of all of the
4663 data references that may or may not be aligned. */
4664
4665 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
4666 {
4667 struct loop *nloop;
4668 tree cond_expr;
4669 tree cond_expr_stmt_list = NULL_TREE;
4670 basic_block condition_bb;
4671 block_stmt_iterator cond_exp_bsi;
4672 basic_block merge_bb;
4673 basic_block new_exit_bb;
4674 edge new_exit_e, e;
4675 tree orig_phi, new_phi, arg;
4676
4677 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
4678 &cond_expr_stmt_list);
4679 initialize_original_copy_tables ();
4680 nloop = loop_version (loop, cond_expr, &condition_bb, true);
4681 free_original_copy_tables();
4682
4683 /** Loop versioning violates an assumption we try to maintain during
4684 vectorization - that the loop exit block has a single predecessor.
4685 After versioning, the exit block of both loop versions is the same
4686 basic block (i.e. it has two predecessors). Just in order to simplify
4687 following transformations in the vectorizer, we fix this situation
4688 here by adding a new (empty) block on the exit-edge of the loop,
4689 with the proper loop-exit phis to maintain loop-closed-form. **/
4690
4691 merge_bb = single_exit (loop)->dest;
4692 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
4693 new_exit_bb = split_edge (single_exit (loop));
4694 new_exit_e = single_exit (loop);
4695 e = EDGE_SUCC (new_exit_bb, 0);
4696
4697 for (orig_phi = phi_nodes (merge_bb); orig_phi;
4698 orig_phi = PHI_CHAIN (orig_phi))
4699 {
4700 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
4701 new_exit_bb);
4702 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
4703 add_phi_arg (new_phi, arg, new_exit_e);
4704 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
4705 }
4706
4707 /** end loop-exit-fixes after versioning **/
4708
4709 update_ssa (TODO_update_ssa);
4710 cond_exp_bsi = bsi_last (condition_bb);
4711 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
4712 }
4713
4714 /* CHECKME: we wouldn't need this if we called update_ssa once
4715 for all loops. */
4716 bitmap_zero (vect_memsyms_to_rename);
4717
4718 /* Peel the loop if there are data refs with unknown alignment.
4719 Only one data ref with unknown store is allowed. */
4720
4721 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4722 vect_do_peeling_for_alignment (loop_vinfo);
4723
4724 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4725 compile time constant), or it is a constant that doesn't divide by the
4726 vectorization factor, then an epilog loop needs to be created.
4727 We therefore duplicate the loop: the original loop will be vectorized,
4728 and will compute the first (n/VF) iterations. The second copy of the loop
4729 will remain scalar and will compute the remaining (n%VF) iterations.
4730 (VF is the vectorization factor). */
4731
4732 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4733 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4734 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
4735 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
4736 else
4737 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
4738 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
4739
4740 /* 1) Make sure the loop header has exactly two entries
4741 2) Make sure we have a preheader basic block. */
4742
4743 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
4744
4745 split_edge (loop_preheader_edge (loop));
4746
4747 /* FORNOW: the vectorizer supports only loops which body consist
4748 of one basic block (header + empty latch). When the vectorizer will
4749 support more involved loop forms, the order by which the BBs are
4750 traversed need to be reconsidered. */
4751
4752 for (i = 0; i < nbbs; i++)
4753 {
4754 basic_block bb = bbs[i];
4755
4756 for (si = bsi_start (bb); !bsi_end_p (si);)
4757 {
4758 tree stmt = bsi_stmt (si);
4759 stmt_vec_info stmt_info;
4760 bool is_store;
4761
4762 if (vect_print_dump_info (REPORT_DETAILS))
4763 {
4764 fprintf (vect_dump, "------>vectorizing statement: ");
4765 print_generic_expr (vect_dump, stmt, TDF_SLIM);
4766 }
4767 stmt_info = vinfo_for_stmt (stmt);
4768 gcc_assert (stmt_info);
4769 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4770 && !STMT_VINFO_LIVE_P (stmt_info))
4771 {
4772 bsi_next (&si);
4773 continue;
4774 }
4775
4776 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
4777 != (unsigned HOST_WIDE_INT) vectorization_factor)
4778 && vect_print_dump_info (REPORT_DETAILS))
4779 fprintf (vect_dump, "multiple-types.");
4780
4781 /* -------- vectorize statement ------------ */
4782 if (vect_print_dump_info (REPORT_DETAILS))
4783 fprintf (vect_dump, "transform statement.");
4784
4785 strided_store = false;
4786 is_store = vect_transform_stmt (stmt, &si, &strided_store);
4787 if (is_store)
4788 {
4789 stmt_ann_t ann;
4790 if (DR_GROUP_FIRST_DR (stmt_info))
4791 {
4792 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4793 interleaving chain was completed - free all the stores in
4794 the chain. */
4795 tree next = DR_GROUP_FIRST_DR (stmt_info);
4796 tree tmp;
4797 stmt_vec_info next_stmt_info;
4798
4799 while (next)
4800 {
4801 next_stmt_info = vinfo_for_stmt (next);
4802 /* Free the attached stmt_vec_info and remove the stmt. */
4803 ann = stmt_ann (next);
4804 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
4805 free (next_stmt_info);
4806 set_stmt_info (ann, NULL);
4807 next = tmp;
4808 }
4809 bsi_remove (&si, true);
4810 continue;
4811 }
4812 else
4813 {
4814 /* Free the attached stmt_vec_info and remove the stmt. */
4815 ann = stmt_ann (stmt);
4816 free (stmt_info);
4817 set_stmt_info (ann, NULL);
4818 bsi_remove (&si, true);
4819 continue;
4820 }
4821 }
4822 else
4823 {
4824 if (strided_store)
4825 {
4826 /* This is case of skipped interleaved store. We don't free
4827 its stmt_vec_info. */
4828 bsi_remove (&si, true);
4829 continue;
4830 }
4831 }
4832 bsi_next (&si);
4833 } /* stmts in BB */
4834 } /* BBs in loop */
4835
4836 slpeel_make_loop_iterate_ntimes (loop, ratio);
4837
4838 mark_set_for_renaming (vect_memsyms_to_rename);
4839
4840 /* The memory tags and pointers in vectorized statements need to
4841 have their SSA forms updated. FIXME, why can't this be delayed
4842 until all the loops have been transformed? */
4843 update_ssa (TODO_update_ssa);
4844
4845 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
4846 fprintf (vect_dump, "LOOP VECTORIZED.");
4847 }