28c657a422a7aa7d14c1fd202dceac26eb9d9048
[gcc.git] / gcc / tree-vect-patterns.c
1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "params.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
39 #include "recog.h"
40 #include "diagnostic-core.h"
41
42 /* Pattern recognition functions */
43 static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45 static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47 static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49 static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
50 static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
52 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
53 vect_recog_widen_mult_pattern,
54 vect_recog_widen_sum_pattern,
55 vect_recog_dot_prod_pattern,
56 vect_recog_pow_pattern,
57 vect_recog_over_widening_pattern};
58
59
60 /* Function widened_name_p
61
62 Check whether NAME, an ssa-name used in USE_STMT,
63 is a result of a type-promotion, such that:
64 DEF_STMT: NAME = NOP (name0)
65 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
66 If CHECK_SIGN is TRUE, check that either both types are signed or both are
67 unsigned. */
68
69 static bool
70 widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt,
71 bool check_sign)
72 {
73 tree dummy;
74 gimple dummy_gimple;
75 loop_vec_info loop_vinfo;
76 stmt_vec_info stmt_vinfo;
77 tree type = TREE_TYPE (name);
78 tree oprnd0;
79 enum vect_def_type dt;
80 tree def;
81
82 stmt_vinfo = vinfo_for_stmt (use_stmt);
83 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
84
85 if (!vect_is_simple_use (name, loop_vinfo, NULL, def_stmt, &def, &dt))
86 return false;
87
88 if (dt != vect_internal_def
89 && dt != vect_external_def && dt != vect_constant_def)
90 return false;
91
92 if (! *def_stmt)
93 return false;
94
95 if (!is_gimple_assign (*def_stmt))
96 return false;
97
98 if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR)
99 return false;
100
101 oprnd0 = gimple_assign_rhs1 (*def_stmt);
102
103 *half_type = TREE_TYPE (oprnd0);
104 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type)
105 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign)
106 || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2)))
107 return false;
108
109 if (!vect_is_simple_use (oprnd0, loop_vinfo, NULL, &dummy_gimple, &dummy,
110 &dt))
111 return false;
112
113 return true;
114 }
115
116 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
117 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
118
119 static tree
120 vect_recog_temp_ssa_var (tree type, gimple stmt)
121 {
122 tree var = create_tmp_var (type, "patt");
123
124 add_referenced_var (var);
125 var = make_ssa_name (var, stmt);
126 return var;
127 }
128
129 /* Function vect_recog_dot_prod_pattern
130
131 Try to find the following pattern:
132
133 type x_t, y_t;
134 TYPE1 prod;
135 TYPE2 sum = init;
136 loop:
137 sum_0 = phi <init, sum_1>
138 S1 x_t = ...
139 S2 y_t = ...
140 S3 x_T = (TYPE1) x_t;
141 S4 y_T = (TYPE1) y_t;
142 S5 prod = x_T * y_T;
143 [S6 prod = (TYPE2) prod; #optional]
144 S7 sum_1 = prod + sum_0;
145
146 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
147 same size of 'TYPE1' or bigger. This is a special case of a reduction
148 computation.
149
150 Input:
151
152 * STMTS: Contains a stmt from which the pattern search begins. In the
153 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
154 will be detected.
155
156 Output:
157
158 * TYPE_IN: The type of the input arguments to the pattern.
159
160 * TYPE_OUT: The type of the output of this pattern.
161
162 * Return value: A new stmt that will be used to replace the sequence of
163 stmts that constitute the pattern. In this case it will be:
164 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
165
166 Note: The dot-prod idiom is a widening reduction pattern that is
167 vectorized without preserving all the intermediate results. It
168 produces only N/2 (widened) results (by summing up pairs of
169 intermediate results) rather than all N results. Therefore, we
170 cannot allow this pattern when we want to get all the results and in
171 the correct order (as is the case when this computation is in an
172 inner-loop nested in an outer-loop that us being vectorized). */
173
174 static gimple
175 vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
176 tree *type_out)
177 {
178 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
179 tree oprnd0, oprnd1;
180 tree oprnd00, oprnd01;
181 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
182 tree type, half_type;
183 gimple pattern_stmt;
184 tree prod_type;
185 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
186 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
187 tree var;
188
189 if (!is_gimple_assign (last_stmt))
190 return NULL;
191
192 type = gimple_expr_type (last_stmt);
193
194 /* Look for the following pattern
195 DX = (TYPE1) X;
196 DY = (TYPE1) Y;
197 DPROD = DX * DY;
198 DDPROD = (TYPE2) DPROD;
199 sum_1 = DDPROD + sum_0;
200 In which
201 - DX is double the size of X
202 - DY is double the size of Y
203 - DX, DY, DPROD all have the same type
204 - sum is the same size of DPROD or bigger
205 - sum has been recognized as a reduction variable.
206
207 This is equivalent to:
208 DPROD = X w* Y; #widen mult
209 sum_1 = DPROD w+ sum_0; #widen summation
210 or
211 DPROD = X w* Y; #widen mult
212 sum_1 = DPROD + sum_0; #summation
213 */
214
215 /* Starting from LAST_STMT, follow the defs of its uses in search
216 of the above pattern. */
217
218 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
219 return NULL;
220
221 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
222 {
223 /* Has been detected as widening-summation? */
224
225 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
226 type = gimple_expr_type (stmt);
227 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
228 return NULL;
229 oprnd0 = gimple_assign_rhs1 (stmt);
230 oprnd1 = gimple_assign_rhs2 (stmt);
231 half_type = TREE_TYPE (oprnd0);
232 }
233 else
234 {
235 gimple def_stmt;
236
237 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
238 return NULL;
239 oprnd0 = gimple_assign_rhs1 (last_stmt);
240 oprnd1 = gimple_assign_rhs2 (last_stmt);
241 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
242 || !types_compatible_p (TREE_TYPE (oprnd1), type))
243 return NULL;
244 stmt = last_stmt;
245
246 if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true))
247 {
248 stmt = def_stmt;
249 oprnd0 = gimple_assign_rhs1 (stmt);
250 }
251 else
252 half_type = type;
253 }
254
255 /* So far so good. Since last_stmt was detected as a (summation) reduction,
256 we know that oprnd1 is the reduction variable (defined by a loop-header
257 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
258 Left to check that oprnd0 is defined by a (widen_)mult_expr */
259 if (TREE_CODE (oprnd0) != SSA_NAME)
260 return NULL;
261
262 prod_type = half_type;
263 stmt = SSA_NAME_DEF_STMT (oprnd0);
264
265 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
266 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
267 return NULL;
268
269 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
270 inside the loop (in case we are analyzing an outer-loop). */
271 if (!is_gimple_assign (stmt))
272 return NULL;
273 stmt_vinfo = vinfo_for_stmt (stmt);
274 gcc_assert (stmt_vinfo);
275 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
276 return NULL;
277 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
278 return NULL;
279 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
280 {
281 /* Has been detected as a widening multiplication? */
282
283 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
284 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
285 return NULL;
286 stmt_vinfo = vinfo_for_stmt (stmt);
287 gcc_assert (stmt_vinfo);
288 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
289 oprnd00 = gimple_assign_rhs1 (stmt);
290 oprnd01 = gimple_assign_rhs2 (stmt);
291 }
292 else
293 {
294 tree half_type0, half_type1;
295 gimple def_stmt;
296 tree oprnd0, oprnd1;
297
298 oprnd0 = gimple_assign_rhs1 (stmt);
299 oprnd1 = gimple_assign_rhs2 (stmt);
300 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
301 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
302 return NULL;
303 if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true))
304 return NULL;
305 oprnd00 = gimple_assign_rhs1 (def_stmt);
306 if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true))
307 return NULL;
308 oprnd01 = gimple_assign_rhs1 (def_stmt);
309 if (!types_compatible_p (half_type0, half_type1))
310 return NULL;
311 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
312 return NULL;
313 }
314
315 half_type = TREE_TYPE (oprnd00);
316 *type_in = half_type;
317 *type_out = type;
318
319 /* Pattern detected. Create a stmt to be used to replace the pattern: */
320 var = vect_recog_temp_ssa_var (type, NULL);
321 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
322 oprnd00, oprnd01, oprnd1);
323
324 if (vect_print_dump_info (REPORT_DETAILS))
325 {
326 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
327 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
328 }
329
330 /* We don't allow changing the order of the computation in the inner-loop
331 when doing outer-loop vectorization. */
332 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
333
334 return pattern_stmt;
335 }
336
337
338 /* Handle two cases of multiplication by a constant. The first one is when
339 the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
340 operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
341 TYPE.
342
343 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
344 HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
345 TYPE), we can perform widen-mult from the intermediate type to TYPE and
346 replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
347
348 static bool
349 vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
350 VEC (gimple, heap) **stmts, tree type,
351 tree *half_type, gimple def_stmt)
352 {
353 tree new_type, new_oprnd, tmp;
354 gimple new_stmt;
355 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
356 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
357
358 if (int_fits_type_p (const_oprnd, *half_type))
359 {
360 /* CONST_OPRND is a constant of HALF_TYPE. */
361 *oprnd = gimple_assign_rhs1 (def_stmt);
362 return true;
363 }
364
365 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4)
366 || !gimple_bb (def_stmt)
367 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
368 || !vinfo_for_stmt (def_stmt))
369 return false;
370
371 /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
372 a type 2 times bigger than HALF_TYPE. */
373 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
374 TYPE_UNSIGNED (type));
375 if (!int_fits_type_p (const_oprnd, new_type))
376 return false;
377
378 /* Use NEW_TYPE for widen_mult. */
379 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
380 {
381 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
382 /* Check if the already created pattern stmt is what we need. */
383 if (!is_gimple_assign (new_stmt)
384 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
385 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
386 return false;
387
388 *oprnd = gimple_assign_lhs (new_stmt);
389 }
390 else
391 {
392 /* Create a_T = (NEW_TYPE) a_t; */
393 *oprnd = gimple_assign_rhs1 (def_stmt);
394 tmp = create_tmp_var (new_type, NULL);
395 add_referenced_var (tmp);
396 new_oprnd = make_ssa_name (tmp, NULL);
397 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
398 NULL_TREE);
399 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
400 VEC_safe_push (gimple, heap, *stmts, def_stmt);
401 *oprnd = new_oprnd;
402 }
403
404 *half_type = new_type;
405 return true;
406 }
407
408
409 /* Function vect_recog_widen_mult_pattern
410
411 Try to find the following pattern:
412
413 type a_t, b_t;
414 TYPE a_T, b_T, prod_T;
415
416 S1 a_t = ;
417 S2 b_t = ;
418 S3 a_T = (TYPE) a_t;
419 S4 b_T = (TYPE) b_t;
420 S5 prod_T = a_T * b_T;
421
422 where type 'TYPE' is at least double the size of type 'type'.
423
424 Also detect unsgigned cases:
425
426 unsigned type a_t, b_t;
427 unsigned TYPE u_prod_T;
428 TYPE a_T, b_T, prod_T;
429
430 S1 a_t = ;
431 S2 b_t = ;
432 S3 a_T = (TYPE) a_t;
433 S4 b_T = (TYPE) b_t;
434 S5 prod_T = a_T * b_T;
435 S6 u_prod_T = (unsigned TYPE) prod_T;
436
437 and multiplication by constants:
438
439 type a_t;
440 TYPE a_T, prod_T;
441
442 S1 a_t = ;
443 S3 a_T = (TYPE) a_t;
444 S5 prod_T = a_T * CONST;
445
446 A special case of multiplication by constants is when 'TYPE' is 4 times
447 bigger than 'type', but CONST fits an intermediate type 2 times smaller
448 than 'TYPE'. In that case we create an additional pattern stmt for S3
449 to create a variable of the intermediate type, and perform widen-mult
450 on the intermediate type as well:
451
452 type a_t;
453 interm_type a_it;
454 TYPE a_T, prod_T, prod_T';
455
456 S1 a_t = ;
457 S3 a_T = (TYPE) a_t;
458 '--> a_it = (interm_type) a_t;
459 S5 prod_T = a_T * CONST;
460 '--> prod_T' = a_it w* CONST;
461
462 Input/Output:
463
464 * STMTS: Contains a stmt from which the pattern search begins. In the
465 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
466 is detected. In case of unsigned widen-mult, the original stmt (S5) is
467 replaced with S6 in STMTS. In case of multiplication by a constant
468 of an intermediate type (the last case above), STMTS also contains S3
469 (inserted before S5).
470
471 Output:
472
473 * TYPE_IN: The type of the input arguments to the pattern.
474
475 * TYPE_OUT: The type of the output of this pattern.
476
477 * Return value: A new stmt that will be used to replace the sequence of
478 stmts that constitute the pattern. In this case it will be:
479 WIDEN_MULT <a_t, b_t>
480 */
481
482 static gimple
483 vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
484 tree *type_in, tree *type_out)
485 {
486 gimple last_stmt = VEC_pop (gimple, *stmts);
487 gimple def_stmt0, def_stmt1;
488 tree oprnd0, oprnd1;
489 tree type, half_type0, half_type1;
490 gimple pattern_stmt;
491 tree vectype, vectype_out = NULL_TREE;
492 tree dummy;
493 tree var;
494 enum tree_code dummy_code;
495 int dummy_int;
496 VEC (tree, heap) *dummy_vec;
497 bool op0_ok, op1_ok;
498
499 if (!is_gimple_assign (last_stmt))
500 return NULL;
501
502 type = gimple_expr_type (last_stmt);
503
504 /* Starting from LAST_STMT, follow the defs of its uses in search
505 of the above pattern. */
506
507 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
508 return NULL;
509
510 oprnd0 = gimple_assign_rhs1 (last_stmt);
511 oprnd1 = gimple_assign_rhs2 (last_stmt);
512 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
513 || !types_compatible_p (TREE_TYPE (oprnd1), type))
514 return NULL;
515
516 /* Check argument 0. */
517 op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
518 /* Check argument 1. */
519 op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
520
521 /* In case of multiplication by a constant one of the operands may not match
522 the pattern, but not both. */
523 if (!op0_ok && !op1_ok)
524 return NULL;
525
526 if (op0_ok && op1_ok)
527 {
528 oprnd0 = gimple_assign_rhs1 (def_stmt0);
529 oprnd1 = gimple_assign_rhs1 (def_stmt1);
530 }
531 else if (!op0_ok)
532 {
533 if (TREE_CODE (oprnd0) == INTEGER_CST
534 && TREE_CODE (half_type1) == INTEGER_TYPE
535 && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
536 stmts, type,
537 &half_type1, def_stmt1))
538 half_type0 = half_type1;
539 else
540 return NULL;
541 }
542 else if (!op1_ok)
543 {
544 if (TREE_CODE (oprnd1) == INTEGER_CST
545 && TREE_CODE (half_type0) == INTEGER_TYPE
546 && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
547 stmts, type,
548 &half_type0, def_stmt0))
549 half_type1 = half_type0;
550 else
551 return NULL;
552 }
553
554 /* Handle unsigned case. Look for
555 S6 u_prod_T = (unsigned TYPE) prod_T;
556 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
557 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
558 {
559 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
560 imm_use_iterator imm_iter;
561 use_operand_p use_p;
562 int nuses = 0;
563 gimple use_stmt = NULL;
564 tree use_type;
565
566 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
567 return NULL;
568
569 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
570 {
571 if (is_gimple_debug (USE_STMT (use_p)))
572 continue;
573 use_stmt = USE_STMT (use_p);
574 nuses++;
575 }
576
577 if (nuses != 1 || !is_gimple_assign (use_stmt)
578 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
579 return NULL;
580
581 use_lhs = gimple_assign_lhs (use_stmt);
582 use_type = TREE_TYPE (use_lhs);
583 if (!INTEGRAL_TYPE_P (use_type)
584 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
585 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
586 return NULL;
587
588 type = use_type;
589 last_stmt = use_stmt;
590 }
591
592 if (!types_compatible_p (half_type0, half_type1))
593 return NULL;
594
595 /* Pattern detected. */
596 if (vect_print_dump_info (REPORT_DETAILS))
597 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
598
599 /* Check target support */
600 vectype = get_vectype_for_scalar_type (half_type0);
601 vectype_out = get_vectype_for_scalar_type (type);
602 if (!vectype
603 || !vectype_out
604 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
605 vectype_out, vectype,
606 &dummy, &dummy, &dummy_code,
607 &dummy_code, &dummy_int, &dummy_vec))
608 return NULL;
609
610 *type_in = vectype;
611 *type_out = vectype_out;
612
613 /* Pattern supported. Create a stmt to be used to replace the pattern: */
614 var = vect_recog_temp_ssa_var (type, NULL);
615 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
616 oprnd1);
617
618 if (vect_print_dump_info (REPORT_DETAILS))
619 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
620
621 VEC_safe_push (gimple, heap, *stmts, last_stmt);
622 return pattern_stmt;
623 }
624
625
626 /* Function vect_recog_pow_pattern
627
628 Try to find the following pattern:
629
630 x = POW (y, N);
631
632 with POW being one of pow, powf, powi, powif and N being
633 either 2 or 0.5.
634
635 Input:
636
637 * LAST_STMT: A stmt from which the pattern search begins.
638
639 Output:
640
641 * TYPE_IN: The type of the input arguments to the pattern.
642
643 * TYPE_OUT: The type of the output of this pattern.
644
645 * Return value: A new stmt that will be used to replace the sequence of
646 stmts that constitute the pattern. In this case it will be:
647 x = x * x
648 or
649 x = sqrt (x)
650 */
651
652 static gimple
653 vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
654 tree *type_out)
655 {
656 gimple last_stmt = VEC_index (gimple, *stmts, 0);
657 tree fn, base, exp = NULL;
658 gimple stmt;
659 tree var;
660
661 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
662 return NULL;
663
664 fn = gimple_call_fndecl (last_stmt);
665 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
666 return NULL;
667
668 switch (DECL_FUNCTION_CODE (fn))
669 {
670 case BUILT_IN_POWIF:
671 case BUILT_IN_POWI:
672 case BUILT_IN_POWF:
673 case BUILT_IN_POW:
674 base = gimple_call_arg (last_stmt, 0);
675 exp = gimple_call_arg (last_stmt, 1);
676 if (TREE_CODE (exp) != REAL_CST
677 && TREE_CODE (exp) != INTEGER_CST)
678 return NULL;
679 break;
680
681 default:
682 return NULL;
683 }
684
685 /* We now have a pow or powi builtin function call with a constant
686 exponent. */
687
688 *type_out = NULL_TREE;
689
690 /* Catch squaring. */
691 if ((host_integerp (exp, 0)
692 && tree_low_cst (exp, 0) == 2)
693 || (TREE_CODE (exp) == REAL_CST
694 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
695 {
696 *type_in = TREE_TYPE (base);
697
698 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
699 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
700 return stmt;
701 }
702
703 /* Catch square root. */
704 if (TREE_CODE (exp) == REAL_CST
705 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
706 {
707 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
708 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
709 if (*type_in)
710 {
711 gimple stmt = gimple_build_call (newfn, 1, base);
712 if (vectorizable_function (stmt, *type_in, *type_in)
713 != NULL_TREE)
714 {
715 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
716 gimple_call_set_lhs (stmt, var);
717 return stmt;
718 }
719 }
720 }
721
722 return NULL;
723 }
724
725
726 /* Function vect_recog_widen_sum_pattern
727
728 Try to find the following pattern:
729
730 type x_t;
731 TYPE x_T, sum = init;
732 loop:
733 sum_0 = phi <init, sum_1>
734 S1 x_t = *p;
735 S2 x_T = (TYPE) x_t;
736 S3 sum_1 = x_T + sum_0;
737
738 where type 'TYPE' is at least double the size of type 'type', i.e - we're
739 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
740 a special case of a reduction computation.
741
742 Input:
743
744 * LAST_STMT: A stmt from which the pattern search begins. In the example,
745 when this function is called with S3, the pattern {S2,S3} will be detected.
746
747 Output:
748
749 * TYPE_IN: The type of the input arguments to the pattern.
750
751 * TYPE_OUT: The type of the output of this pattern.
752
753 * Return value: A new stmt that will be used to replace the sequence of
754 stmts that constitute the pattern. In this case it will be:
755 WIDEN_SUM <x_t, sum_0>
756
757 Note: The widening-sum idiom is a widening reduction pattern that is
758 vectorized without preserving all the intermediate results. It
759 produces only N/2 (widened) results (by summing up pairs of
760 intermediate results) rather than all N results. Therefore, we
761 cannot allow this pattern when we want to get all the results and in
762 the correct order (as is the case when this computation is in an
763 inner-loop nested in an outer-loop that us being vectorized). */
764
765 static gimple
766 vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
767 tree *type_out)
768 {
769 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
770 tree oprnd0, oprnd1;
771 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
772 tree type, half_type;
773 gimple pattern_stmt;
774 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
775 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
776 tree var;
777
778 if (!is_gimple_assign (last_stmt))
779 return NULL;
780
781 type = gimple_expr_type (last_stmt);
782
783 /* Look for the following pattern
784 DX = (TYPE) X;
785 sum_1 = DX + sum_0;
786 In which DX is at least double the size of X, and sum_1 has been
787 recognized as a reduction variable.
788 */
789
790 /* Starting from LAST_STMT, follow the defs of its uses in search
791 of the above pattern. */
792
793 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
794 return NULL;
795
796 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
797 return NULL;
798
799 oprnd0 = gimple_assign_rhs1 (last_stmt);
800 oprnd1 = gimple_assign_rhs2 (last_stmt);
801 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
802 || !types_compatible_p (TREE_TYPE (oprnd1), type))
803 return NULL;
804
805 /* So far so good. Since last_stmt was detected as a (summation) reduction,
806 we know that oprnd1 is the reduction variable (defined by a loop-header
807 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
808 Left to check that oprnd0 is defined by a cast from type 'type' to type
809 'TYPE'. */
810
811 if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true))
812 return NULL;
813
814 oprnd0 = gimple_assign_rhs1 (stmt);
815 *type_in = half_type;
816 *type_out = type;
817
818 /* Pattern detected. Create a stmt to be used to replace the pattern: */
819 var = vect_recog_temp_ssa_var (type, NULL);
820 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
821 oprnd0, oprnd1);
822
823 if (vect_print_dump_info (REPORT_DETAILS))
824 {
825 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
826 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
827 }
828
829 /* We don't allow changing the order of the computation in the inner-loop
830 when doing outer-loop vectorization. */
831 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
832
833 return pattern_stmt;
834 }
835
836
837 /* Return TRUE if the operation in STMT can be performed on a smaller type.
838
839 Input:
840 STMT - a statement to check.
841 DEF - we support operations with two operands, one of which is constant.
842 The other operand can be defined by a demotion operation, or by a
843 previous statement in a sequence of over-promoted operations. In the
844 later case DEF is used to replace that operand. (It is defined by a
845 pattern statement we created for the previous statement in the
846 sequence).
847
848 Input/output:
849 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
850 NULL, it's the type of DEF.
851 STMTS - additional pattern statements. If a pattern statement (type
852 conversion) is created in this function, its original statement is
853 added to STMTS.
854
855 Output:
856 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
857 operands to use in the new pattern statement for STMT (will be created
858 in vect_recog_over_widening_pattern ()).
859 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
860 statements for STMT: the first one is a type promotion and the second
861 one is the operation itself. We return the type promotion statement
862 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
863 the second pattern statement. */
864
865 static bool
866 vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
867 tree *op0, tree *op1, gimple *new_def_stmt,
868 VEC (gimple, heap) **stmts)
869 {
870 enum tree_code code;
871 tree const_oprnd, oprnd;
872 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
873 gimple def_stmt, new_stmt;
874 bool first = false;
875 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
876 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
877
878 *new_def_stmt = NULL;
879
880 if (!is_gimple_assign (stmt))
881 return false;
882
883 code = gimple_assign_rhs_code (stmt);
884 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
885 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
886 return false;
887
888 oprnd = gimple_assign_rhs1 (stmt);
889 const_oprnd = gimple_assign_rhs2 (stmt);
890 type = gimple_expr_type (stmt);
891
892 if (TREE_CODE (oprnd) != SSA_NAME
893 || TREE_CODE (const_oprnd) != INTEGER_CST)
894 return false;
895
896 /* If we are in the middle of a sequence, we use DEF from a previous
897 statement. Otherwise, OPRND has to be a result of type promotion. */
898 if (*new_type)
899 {
900 half_type = *new_type;
901 oprnd = def;
902 }
903 else
904 {
905 first = true;
906 if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false)
907 || !gimple_bb (def_stmt)
908 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
909 || !vinfo_for_stmt (def_stmt))
910 return false;
911 }
912
913 /* Can we perform the operation on a smaller type? */
914 switch (code)
915 {
916 case BIT_IOR_EXPR:
917 case BIT_XOR_EXPR:
918 case BIT_AND_EXPR:
919 if (!int_fits_type_p (const_oprnd, half_type))
920 {
921 /* HALF_TYPE is not enough. Try a bigger type if possible. */
922 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
923 return false;
924
925 interm_type = build_nonstandard_integer_type (
926 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
927 if (!int_fits_type_p (const_oprnd, interm_type))
928 return false;
929 }
930
931 break;
932
933 case LSHIFT_EXPR:
934 /* Try intermediate type - HALF_TYPE is not enough for sure. */
935 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
936 return false;
937
938 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
939 (e.g., if the original value was char, the shift amount is at most 8
940 if we want to use short). */
941 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
942 return false;
943
944 interm_type = build_nonstandard_integer_type (
945 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
946
947 if (!vect_supportable_shift (code, interm_type))
948 return false;
949
950 break;
951
952 case RSHIFT_EXPR:
953 if (vect_supportable_shift (code, half_type))
954 break;
955
956 /* Try intermediate type - HALF_TYPE is not supported. */
957 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
958 return false;
959
960 interm_type = build_nonstandard_integer_type (
961 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
962
963 if (!vect_supportable_shift (code, interm_type))
964 return false;
965
966 break;
967
968 default:
969 gcc_unreachable ();
970 }
971
972 /* There are four possible cases:
973 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
974 the first statement in the sequence)
975 a. The original, HALF_TYPE, is not enough - we replace the promotion
976 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
977 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
978 promotion.
979 2. OPRND is defined by a pattern statement we created.
980 a. Its type is not sufficient for the operation, we create a new stmt:
981 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
982 this statement in NEW_DEF_STMT, and it is later put in
983 STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
984 b. OPRND is good to use in the new statement. */
985 if (first)
986 {
987 if (interm_type)
988 {
989 /* Replace the original type conversion HALF_TYPE->TYPE with
990 HALF_TYPE->INTERM_TYPE. */
991 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
992 {
993 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
994 /* Check if the already created pattern stmt is what we need. */
995 if (!is_gimple_assign (new_stmt)
996 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
997 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
998 return false;
999
1000 oprnd = gimple_assign_lhs (new_stmt);
1001 }
1002 else
1003 {
1004 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1005 oprnd = gimple_assign_rhs1 (def_stmt);
1006 tmp = create_tmp_reg (interm_type, NULL);
1007 add_referenced_var (tmp);
1008 new_oprnd = make_ssa_name (tmp, NULL);
1009 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1010 oprnd, NULL_TREE);
1011 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1012 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1013 oprnd = new_oprnd;
1014 }
1015 }
1016 else
1017 {
1018 /* Retrieve the operand before the type promotion. */
1019 oprnd = gimple_assign_rhs1 (def_stmt);
1020 }
1021 }
1022 else
1023 {
1024 if (interm_type)
1025 {
1026 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1027 tmp = create_tmp_reg (interm_type, NULL);
1028 add_referenced_var (tmp);
1029 new_oprnd = make_ssa_name (tmp, NULL);
1030 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1031 oprnd, NULL_TREE);
1032 oprnd = new_oprnd;
1033 *new_def_stmt = new_stmt;
1034 }
1035
1036 /* Otherwise, OPRND is already set. */
1037 }
1038
1039 if (interm_type)
1040 *new_type = interm_type;
1041 else
1042 *new_type = half_type;
1043
1044 *op0 = oprnd;
1045 *op1 = fold_convert (*new_type, const_oprnd);
1046
1047 return true;
1048 }
1049
1050
1051 /* Try to find a statement or a sequence of statements that can be performed
1052 on a smaller type:
1053
1054 type x_t;
1055 TYPE x_T, res0_T, res1_T;
1056 loop:
1057 S1 x_t = *p;
1058 S2 x_T = (TYPE) x_t;
1059 S3 res0_T = op (x_T, C0);
1060 S4 res1_T = op (res0_T, C1);
1061 S5 ... = () res1_T; - type demotion
1062
1063 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1064 constants.
1065 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1066 be 'type' or some intermediate type. For now, we expect S5 to be a type
1067 demotion operation. We also check that S3 and S4 have only one use.
1068 .
1069
1070 */
1071 static gimple
1072 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1073 tree *type_in, tree *type_out)
1074 {
1075 gimple stmt = VEC_pop (gimple, *stmts);
1076 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1077 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1078 imm_use_iterator imm_iter;
1079 use_operand_p use_p;
1080 int nuses = 0;
1081 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1082 bool first;
1083 struct loop *loop = (gimple_bb (stmt))->loop_father;
1084
1085 first = true;
1086 while (1)
1087 {
1088 if (!vinfo_for_stmt (stmt)
1089 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1090 return NULL;
1091
1092 new_def_stmt = NULL;
1093 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1094 &op0, &op1, &new_def_stmt,
1095 stmts))
1096 {
1097 if (first)
1098 return NULL;
1099 else
1100 break;
1101 }
1102
1103 /* STMT can be performed on a smaller type. Check its uses. */
1104 lhs = gimple_assign_lhs (stmt);
1105 nuses = 0;
1106 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1107 {
1108 if (is_gimple_debug (USE_STMT (use_p)))
1109 continue;
1110 use_stmt = USE_STMT (use_p);
1111 nuses++;
1112 }
1113
1114 if (nuses != 1 || !is_gimple_assign (use_stmt)
1115 || !gimple_bb (use_stmt)
1116 || !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1117 return NULL;
1118
1119 /* Create pattern statement for STMT. */
1120 vectype = get_vectype_for_scalar_type (new_type);
1121 if (!vectype)
1122 return NULL;
1123
1124 /* We want to collect all the statements for which we create pattern
1125 statetments, except for the case when the last statement in the
1126 sequence doesn't have a corresponding pattern statement. In such
1127 case we associate the last pattern statement with the last statement
1128 in the sequence. Therefore, we only add an original statetement to
1129 the list if we know that it is not the last. */
1130 if (prev_stmt)
1131 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1132
1133 var = vect_recog_temp_ssa_var (new_type, NULL);
1134 pattern_stmt
1135 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1136 op0, op1);
1137 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1138 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
1139
1140 if (vect_print_dump_info (REPORT_DETAILS))
1141 {
1142 fprintf (vect_dump, "created pattern stmt: ");
1143 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1144 }
1145
1146 prev_stmt = stmt;
1147 stmt = use_stmt;
1148
1149 first = false;
1150 }
1151
1152 /* We got a sequence. We expect it to end with a type demotion operation.
1153 Otherwise, we quit (for now). There are three possible cases: the
1154 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1155 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1156 NEW_TYPE differs (we create a new conversion statement). */
1157 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1158 {
1159 use_lhs = gimple_assign_lhs (use_stmt);
1160 use_type = TREE_TYPE (use_lhs);
1161 /* Support only type promotion or signedess change. */
1162 if (!INTEGRAL_TYPE_P (use_type)
1163 || TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1164 return NULL;
1165
1166 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1167 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1168 {
1169 /* Create NEW_TYPE->USE_TYPE conversion. */
1170 tmp = create_tmp_reg (use_type, NULL);
1171 add_referenced_var (tmp);
1172 new_oprnd = make_ssa_name (tmp, NULL);
1173 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1174 var, NULL_TREE);
1175 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1176
1177 *type_in = get_vectype_for_scalar_type (new_type);
1178 *type_out = get_vectype_for_scalar_type (use_type);
1179
1180 /* We created a pattern statement for the last statement in the
1181 sequence, so we don't need to associate it with the pattern
1182 statement created for PREV_STMT. Therefore, we add PREV_STMT
1183 to the list in order to mark it later in vect_pattern_recog_1. */
1184 if (prev_stmt)
1185 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1186 }
1187 else
1188 {
1189 if (prev_stmt)
1190 STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
1191 = STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
1192
1193 *type_in = vectype;
1194 *type_out = NULL_TREE;
1195 }
1196
1197 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1198 }
1199 else
1200 /* TODO: support general case, create a conversion to the correct type. */
1201 return NULL;
1202
1203 /* Pattern detected. */
1204 if (vect_print_dump_info (REPORT_DETAILS))
1205 {
1206 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1207 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1208 }
1209
1210 return pattern_stmt;
1211 }
1212
1213
1214 /* Mark statements that are involved in a pattern. */
1215
1216 static inline void
1217 vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
1218 tree pattern_vectype)
1219 {
1220 stmt_vec_info pattern_stmt_info, def_stmt_info;
1221 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
1223 gimple def_stmt;
1224
1225 set_vinfo_for_stmt (pattern_stmt,
1226 new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
1227 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1228 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
1229
1230 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
1231 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
1232 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1233 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
1234 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
1235 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
1236 STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
1237 = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
1238 if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
1239 {
1240 def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
1241 set_vinfo_for_stmt (def_stmt,
1242 new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
1243 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
1244 def_stmt_info = vinfo_for_stmt (def_stmt);
1245 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
1246 STMT_VINFO_DEF_TYPE (def_stmt_info)
1247 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1248 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
1249 }
1250 }
1251
1252 /* Function vect_pattern_recog_1
1253
1254 Input:
1255 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
1256 computation pattern.
1257 STMT: A stmt from which the pattern search should start.
1258
1259 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
1260 expression that computes the same functionality and can be used to
1261 replace the sequence of stmts that are involved in the pattern.
1262
1263 Output:
1264 This function checks if the expression returned by PATTERN_RECOG_FUNC is
1265 supported in vector form by the target. We use 'TYPE_IN' to obtain the
1266 relevant vector type. If 'TYPE_IN' is already a vector type, then this
1267 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
1268 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
1269 to the available target pattern.
1270
1271 This function also does some bookkeeping, as explained in the documentation
1272 for vect_recog_pattern. */
1273
1274 static void
1275 vect_pattern_recog_1 (
1276 gimple (* vect_recog_func) (VEC (gimple, heap) **, tree *, tree *),
1277 gimple_stmt_iterator si,
1278 VEC (gimple, heap) **stmts_to_replace)
1279 {
1280 gimple stmt = gsi_stmt (si), pattern_stmt;
1281 stmt_vec_info stmt_info;
1282 loop_vec_info loop_vinfo;
1283 tree pattern_vectype;
1284 tree type_in, type_out;
1285 enum tree_code code;
1286 int i;
1287 gimple next;
1288
1289 VEC_truncate (gimple, *stmts_to_replace, 0);
1290 VEC_quick_push (gimple, *stmts_to_replace, stmt);
1291 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
1292 if (!pattern_stmt)
1293 return;
1294
1295 stmt = VEC_last (gimple, *stmts_to_replace);
1296 stmt_info = vinfo_for_stmt (stmt);
1297 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1298
1299 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
1300 {
1301 /* No need to check target support (already checked by the pattern
1302 recognition function). */
1303 if (type_out)
1304 gcc_assert (VECTOR_MODE_P (TYPE_MODE (type_out)));
1305 pattern_vectype = type_out ? type_out : type_in;
1306 }
1307 else
1308 {
1309 enum machine_mode vec_mode;
1310 enum insn_code icode;
1311 optab optab;
1312
1313 /* Check target support */
1314 type_in = get_vectype_for_scalar_type (type_in);
1315 if (!type_in)
1316 return;
1317 if (type_out)
1318 type_out = get_vectype_for_scalar_type (type_out);
1319 else
1320 type_out = type_in;
1321 if (!type_out)
1322 return;
1323 pattern_vectype = type_out;
1324
1325 if (is_gimple_assign (pattern_stmt))
1326 code = gimple_assign_rhs_code (pattern_stmt);
1327 else
1328 {
1329 gcc_assert (is_gimple_call (pattern_stmt));
1330 code = CALL_EXPR;
1331 }
1332
1333 optab = optab_for_tree_code (code, type_in, optab_default);
1334 vec_mode = TYPE_MODE (type_in);
1335 if (!optab
1336 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
1337 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
1338 return;
1339 }
1340
1341 /* Found a vectorizable pattern. */
1342 if (vect_print_dump_info (REPORT_DETAILS))
1343 {
1344 fprintf (vect_dump, "pattern recognized: ");
1345 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1346 }
1347
1348 /* Mark the stmts that are involved in the pattern. */
1349 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
1350
1351 /* Patterns cannot be vectorized using SLP, because they change the order of
1352 computation. */
1353 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
1354 if (next == stmt)
1355 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
1356
1357 /* It is possible that additional pattern stmts are created and inserted in
1358 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
1359 relevant statements. */
1360 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
1361 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
1362 i++)
1363 {
1364 stmt_info = vinfo_for_stmt (stmt);
1365 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1366 if (vect_print_dump_info (REPORT_DETAILS))
1367 {
1368 fprintf (vect_dump, "additional pattern stmt: ");
1369 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1370 }
1371
1372 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
1373 }
1374 }
1375
1376
1377 /* Function vect_pattern_recog
1378
1379 Input:
1380 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
1381 computation idioms.
1382
1383 Output - for each computation idiom that is detected we create a new stmt
1384 that provides the same functionality and that can be vectorized. We
1385 also record some information in the struct_stmt_info of the relevant
1386 stmts, as explained below:
1387
1388 At the entry to this function we have the following stmts, with the
1389 following initial value in the STMT_VINFO fields:
1390
1391 stmt in_pattern_p related_stmt vec_stmt
1392 S1: a_i = .... - - -
1393 S2: a_2 = ..use(a_i).. - - -
1394 S3: a_1 = ..use(a_2).. - - -
1395 S4: a_0 = ..use(a_1).. - - -
1396 S5: ... = ..use(a_0).. - - -
1397
1398 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
1399 represented by a single stmt. We then:
1400 - create a new stmt S6 equivalent to the pattern (the stmt is not
1401 inserted into the code)
1402 - fill in the STMT_VINFO fields as follows:
1403
1404 in_pattern_p related_stmt vec_stmt
1405 S1: a_i = .... - - -
1406 S2: a_2 = ..use(a_i).. - - -
1407 S3: a_1 = ..use(a_2).. - - -
1408 S4: a_0 = ..use(a_1).. true S6 -
1409 '---> S6: a_new = .... - S4 -
1410 S5: ... = ..use(a_0).. - - -
1411
1412 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
1413 to each other through the RELATED_STMT field).
1414
1415 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
1416 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
1417 remain irrelevant unless used by stmts other than S4.
1418
1419 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
1420 (because they are marked as irrelevant). It will vectorize S6, and record
1421 a pointer to the new vector stmt VS6 from S6 (as usual).
1422 S4 will be skipped, and S5 will be vectorized as usual:
1423
1424 in_pattern_p related_stmt vec_stmt
1425 S1: a_i = .... - - -
1426 S2: a_2 = ..use(a_i).. - - -
1427 S3: a_1 = ..use(a_2).. - - -
1428 > VS6: va_new = .... - - -
1429 S4: a_0 = ..use(a_1).. true S6 VS6
1430 '---> S6: a_new = .... - S4 VS6
1431 > VS5: ... = ..vuse(va_new).. - - -
1432 S5: ... = ..use(a_0).. - - -
1433
1434 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
1435 elsewhere), and we'll end up with:
1436
1437 VS6: va_new = ....
1438 VS5: ... = ..vuse(va_new)..
1439
1440 In case of more than one pattern statements, e.g., widen-mult with
1441 intermediate type:
1442
1443 S1 a_t = ;
1444 S2 a_T = (TYPE) a_t;
1445 '--> S3: a_it = (interm_type) a_t;
1446 S4 prod_T = a_T * CONST;
1447 '--> S5: prod_T' = a_it w* CONST;
1448
1449 there may be other users of a_T outside the pattern. In that case S2 will
1450 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
1451 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
1452 be recorded in S3. */
1453
1454 void
1455 vect_pattern_recog (loop_vec_info loop_vinfo)
1456 {
1457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1458 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1459 unsigned int nbbs = loop->num_nodes;
1460 gimple_stmt_iterator si;
1461 unsigned int i, j;
1462 gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
1463 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
1464
1465 if (vect_print_dump_info (REPORT_DETAILS))
1466 fprintf (vect_dump, "=== vect_pattern_recog ===");
1467
1468 /* Scan through the loop stmts, applying the pattern recognition
1469 functions starting at each stmt visited: */
1470 for (i = 0; i < nbbs; i++)
1471 {
1472 basic_block bb = bbs[i];
1473 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1474 {
1475 /* Scan over all generic vect_recog_xxx_pattern functions. */
1476 for (j = 0; j < NUM_PATTERNS; j++)
1477 {
1478 vect_recog_func_ptr = vect_vect_recog_func_ptrs[j];
1479 vect_pattern_recog_1 (vect_recog_func_ptr, si,
1480 &stmts_to_replace);
1481 }
1482 }
1483 }
1484
1485 VEC_free (gimple, heap, stmts_to_replace);
1486 }